18 using System.Collections.Generic;
21 using TraceLabSDK.Types;
23 namespace TraceLab.Components.DevelopmentKit.Utils
30 #region Simple statistics
39 return AverageSimilarity(matrix.AllLinks);
51 foreach (TLSingleLink link
in list)
56 return sum / list.Count;
66 return SimilarityStandardDeviation(matrix.AllLinks);
76 double average = AverageSimilarity(list);
77 double sumOfDerivation = 0;
79 foreach (TLSingleLink link
in list)
81 sumOfDerivation += link.Score * link.Score;
84 double sumOfDerivationAverage = sumOfDerivation / list.Count;
85 return Math.Sqrt(sumOfDerivationAverage - (average * average));
99 TLSimilarityMatrix matrix =
new TLSimilarityMatrix();
100 foreach (TLSingleLink link
in list)
102 matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
119 TLLinksList links =
new TLLinksList();
120 foreach (TLSingleLink link
in matrix.AllLinks)
122 if (link.Score > matrix.Threshold)
136 TLLinksList links =
new TLLinksList();
137 foreach (TLSingleLink link
in matrix.AllLinks)
139 if (link.Score > threshold)
151 public static TLLinksList
GetTopNLinks(TLSimilarityMatrix matrix,
int topN)
153 if (matrix.AllLinks.Count < topN)
155 throw new DevelopmentKitException(
"Matrix only has " + matrix.AllLinks.Count +
" links (" + topN +
" requested).");
161 TLLinksList links = matrix.AllLinks;
163 TLLinksList newLinks =
new TLLinksList();
164 for (
int i = 0; i < topN; i++)
166 newLinks.Add(links[i]);
178 public static TLLinksList
GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix,
double level)
180 if (level <= 0.0 || level > 1.0)
184 double totalCorrect = answerMatrix.Count * level;
186 TLLinksList links = matrix.AllLinks;
188 TLLinksList newLinks =
new TLLinksList();
189 while (links.Count > 0 && numCorrect < totalCorrect)
191 TLSingleLink link = links[0];
192 if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
210 return RemoveTopPercentage(matrix.AllLinks, percent);
221 if (percent <= 0.0 || percent >= 1.0)
225 TLLinksList remaining =
new TLLinksList();
227 int startIndex = Convert.ToInt32(Math.Ceiling(links.Count * percent)) - 1;
228 for (
int i = startIndex; i < links.Count; i++)
230 TLSingleLink link = links[i];
231 remaining.Add(
new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
244 return RemoveBottomPercentage(matrix.AllLinks, percent);
255 if (percent <= 0.0 || percent >= 1.0)
259 TLLinksList remaining =
new TLLinksList();
261 int endIndex = Convert.ToInt32(Math.Floor(links.Count * (1 - percent))) - 1;
262 for (
int i = 0; i < endIndex; i++)
264 TLSingleLink link = links[i];
265 remaining.Add(
new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
277 public static TLLinksList
ExtractLinks(TLSimilarityMatrix original, IEnumerable<string> artifactIDs,
bool ignoreParameters)
279 return ExtractLinks(original.AllLinks, artifactIDs, ignoreParameters);
289 public static TLLinksList
ExtractLinks(TLLinksList original, IEnumerable<string> artifactIDs,
bool ignoreParameters)
291 TLSimilarityMatrix matrix =
new TLSimilarityMatrix();
292 foreach (TLSingleLink link
in original)
294 string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf(
'(') > 0)
295 ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf(
'('))
296 : link.SourceArtifactId;
297 string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf(
'(') > 0)
298 ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf(
'('))
299 : link.TargetArtifactId;
300 if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
302 matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
305 return matrix.AllLinks;
315 return GetSetOfTargetArtifacts(matrix.AllLinks);
325 HashSet<string> artifacts =
new HashSet<string>();
326 foreach (TLSingleLink link
in links)
328 artifacts.Add(link.TargetArtifactId);
340 Dictionary<string, Dictionary<string, double>> pseudomatrix =
new Dictionary<string, Dictionary<string, double>>();
341 foreach (TLSingleLink link
in matrix.AllLinks)
343 if (!pseudomatrix.ContainsKey(link.SourceArtifactId))
345 pseudomatrix.Add(link.SourceArtifactId,
new Dictionary<string,double>());
347 int startIndex = link.TargetArtifactId.IndexOf(
'(');
348 string target = (startIndex > 0)
349 ? link.TargetArtifactId.Substring(0, startIndex)
350 : link.TargetArtifactId;
351 if (!pseudomatrix[link.SourceArtifactId].ContainsKey(target))
353 pseudomatrix[link.SourceArtifactId].Add(target, link.Score);
357 if (link.Score > pseudomatrix[link.SourceArtifactId][target])
359 pseudomatrix[link.SourceArtifactId][target] = link.Score;
363 TLSimilarityMatrix collapsedMatrix =
new TLSimilarityMatrix();
364 foreach (
string sourceID
in pseudomatrix.Keys)
366 foreach (
string targetID
in pseudomatrix[sourceID].Keys)
368 collapsedMatrix.AddLink(sourceID, targetID, pseudomatrix[sourceID][targetID]);
371 return collapsedMatrix;