18 using System.Collections.Generic;
19 using TraceLabSDK.Types;
21 namespace TraceLab.Components.DevelopmentKit.Tracers.InformationRetrieval
26 public static class JSD
34 public static TLSimilarityMatrix
Compute(TLArtifactsCollection source, TLArtifactsCollection target)
48 TLSimilarityMatrix sims =
new TLSimilarityMatrix();
49 for (
int i = 0; i < matrices[0].NumDocs; i++)
51 TLLinksList list =
new TLLinksList();
52 for (
int j = 0; j < matrices[1].NumDocs; j++)
54 list.Add(
new TLSingleLink(matrices[0].GetDocumentName(i), matrices[1].GetDocumentName(j),
55 DocumentSimilarity(matrices[0].GetDocument(i), matrices[1].GetDocument(j))));
58 foreach (TLSingleLink link
in list)
60 sims.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
76 double[] distribution1 =
new double[document1.Length];
77 double[] distribution2 =
new double[document2.Length];
78 double sum1 = 0, sum2 = 0;
79 for (
int i = 0; i < document1.Length; i++)
81 sum1 = sum1 + document1[i];
82 sum2 = sum2 + document2[i];
84 for (
int i = 0; i < document1.Length; i++)
86 distribution1[i] = document1[i] / sum1;
87 distribution2[i] = document2[i] / sum2;
91 temp = sumDocument(distribution1, distribution2);
92 temp = mulDocument(0.5, temp);
93 similarity = entropy(temp);
94 similarity = similarity - (entropy(distribution1) + entropy(distribution2)) / 2;
96 similarity = 1 - similarity;
105 public static double entropy(
double[] docDistrib)
109 for (i = 0; i < docDistrib.Length; i++)
111 if (docDistrib[i] > 0)
113 entropia = entropia - docDistrib[i] * Math.Log(docDistrib[i], 2);
126 public static double[]
sumDocument(
double[] document1,
double[] document2)
128 double[] sum =
new double[document1.Length];
130 for (
int i = 0; i < sum.Length; i++)
132 sum[i] = document1[i] + document2[i];
144 public static double[]
mulDocument(
double pScalar,
double[] pVector)
146 double[] mul =
new double[pVector.Length];
147 for (
int i = 0; i < mul.Length; i++)
148 mul[i] = pScalar * pVector[i];