TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
TLSimilarityMatrixUtil.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Collections.Generic;
19 using System.Linq;
20 using System.Text;
21 using TraceLabSDK.Types;
22 
23 namespace TraceLab.Components.DevelopmentKit.Utils
24 {
28  public static class TLSimilarityMatrixUtil
29  {
30  #region Simple statistics
31 
37  public static double AverageSimilarity(TLSimilarityMatrix matrix)
38  {
39  return AverageSimilarity(matrix.AllLinks);
40  }
41 
47  public static double AverageSimilarity(TLLinksList list)
48  {
49  double sum = 0;
50 
51  foreach (TLSingleLink link in list)
52  {
53  sum += link.Score;
54  }
55 
56  return sum / list.Count;
57  }
58 
64  public static double SimilarityStandardDeviation(TLSimilarityMatrix matrix)
65  {
66  return SimilarityStandardDeviation(matrix.AllLinks);
67  }
68 
74  public static double SimilarityStandardDeviation(TLLinksList list)
75  {
76  double average = AverageSimilarity(list);
77  double sumOfDerivation = 0;
78 
79  foreach (TLSingleLink link in list)
80  {
81  sumOfDerivation += link.Score * link.Score;
82  }
83 
84  double sumOfDerivationAverage = sumOfDerivation / list.Count;
85  return Math.Sqrt(sumOfDerivationAverage - (average * average));
86  }
87 
88  #endregion
89 
90  #region Constructor
91 
97  public static TLSimilarityMatrix CreateMatrix(TLLinksList list)
98  {
99  TLSimilarityMatrix matrix = new TLSimilarityMatrix();
100  foreach (TLSingleLink link in list)
101  {
102  matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
103  }
104  return matrix;
105  }
106 
107  #endregion
108 
109  #region Link pruning
110 
117  public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix)
118  {
119  TLLinksList links = new TLLinksList();
120  foreach (TLSingleLink link in matrix.AllLinks)
121  {
122  if (link.Score > matrix.Threshold)
123  links.Add(link);
124  }
125  return links;
126  }
127 
134  public static TLLinksList GetLinksAboveThreshold(TLSimilarityMatrix matrix, double threshold)
135  {
136  TLLinksList links = new TLLinksList();
137  foreach (TLSingleLink link in matrix.AllLinks)
138  {
139  if (link.Score > threshold)
140  links.Add(link);
141  }
142  return links;
143  }
144 
151  public static TLLinksList GetTopNLinks(TLSimilarityMatrix matrix, int topN)
152  {
153  if (matrix.AllLinks.Count < topN)
154  {
155  throw new DevelopmentKitException("Matrix only has " + matrix.AllLinks.Count + " links (" + topN + " requested).");
156  }
157  if (topN < 1)
158  {
159  throw new DevelopmentKitException("topN must be greater than 0.");
160  }
161  TLLinksList links = matrix.AllLinks;
162  links.Sort();
163  TLLinksList newLinks = new TLLinksList();
164  for (int i = 0; i < topN; i++)
165  {
166  newLinks.Add(links[i]);
167  }
168  return newLinks;
169  }
170 
178  public static TLLinksList GetLinksAtRecall(TLSimilarityMatrix matrix, TLSimilarityMatrix answerMatrix, double level)
179  {
180  if (level <= 0.0 || level > 1.0)
181  {
182  throw new DevelopmentKitException("Recall level must be between 0 and 1.");
183  }
184  double totalCorrect = answerMatrix.Count * level;
185  int numCorrect = 0;
186  TLLinksList links = matrix.AllLinks;
187  links.Sort();
188  TLLinksList newLinks = new TLLinksList();
189  while (links.Count > 0 && numCorrect < totalCorrect)
190  {
191  TLSingleLink link = links[0];
192  if (answerMatrix.IsLinkAboveThreshold(link.SourceArtifactId, link.TargetArtifactId))
193  {
194  numCorrect++;
195  }
196  newLinks.Add(link);
197  links.RemoveAt(0);
198  }
199  return newLinks;
200  }
201 
208  public static TLLinksList RemoveTopPercentage(TLSimilarityMatrix matrix, double percent)
209  {
210  return RemoveTopPercentage(matrix.AllLinks, percent);
211  }
212 
219  public static TLLinksList RemoveTopPercentage(TLLinksList links, double percent)
220  {
221  if (percent <= 0.0 || percent >= 1.0)
222  {
223  throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
224  }
225  TLLinksList remaining = new TLLinksList();
226  links.Sort();
227  int startIndex = Convert.ToInt32(Math.Ceiling(links.Count * percent)) - 1;
228  for (int i = startIndex; i < links.Count; i++)
229  {
230  TLSingleLink link = links[i];
231  remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
232  }
233  return remaining;
234  }
235 
242  public static TLLinksList RemoveBottomPercentage(TLSimilarityMatrix matrix, double percent)
243  {
244  return RemoveBottomPercentage(matrix.AllLinks, percent);
245  }
246 
253  public static TLLinksList RemoveBottomPercentage(TLLinksList links, double percent)
254  {
255  if (percent <= 0.0 || percent >= 1.0)
256  {
257  throw new DevelopmentKitException("Percentage level must be between 0 and 1.");
258  }
259  TLLinksList remaining = new TLLinksList();
260  links.Sort();
261  int endIndex = Convert.ToInt32(Math.Floor(links.Count * (1 - percent))) - 1;
262  for (int i = 0; i < endIndex; i++)
263  {
264  TLSingleLink link = links[i];
265  remaining.Add(new TLSingleLink(link.SourceArtifactId, link.TargetArtifactId, link.Score));
266  }
267  return remaining;
268  }
269 
277  public static TLLinksList ExtractLinks(TLSimilarityMatrix original, IEnumerable<string> artifactIDs, bool ignoreParameters)
278  {
279  return ExtractLinks(original.AllLinks, artifactIDs, ignoreParameters);
280  }
281 
289  public static TLLinksList ExtractLinks(TLLinksList original, IEnumerable<string> artifactIDs, bool ignoreParameters)
290  {
291  TLSimilarityMatrix matrix = new TLSimilarityMatrix();
292  foreach (TLSingleLink link in original)
293  {
294  string sourceID = (ignoreParameters && link.SourceArtifactId.IndexOf('(') > 0)
295  ? link.SourceArtifactId.Substring(0, link.SourceArtifactId.IndexOf('('))
296  : link.SourceArtifactId;
297  string targetID = (ignoreParameters && link.TargetArtifactId.IndexOf('(') > 0)
298  ? link.TargetArtifactId.Substring(0, link.TargetArtifactId.IndexOf('('))
299  : link.TargetArtifactId;
300  if (artifactIDs.Contains(sourceID) || artifactIDs.Contains(targetID))
301  {
302  matrix.AddLink(link.SourceArtifactId, link.TargetArtifactId, link.Score);
303  }
304  }
305  return matrix.AllLinks;
306  }
307 
313  public static ISet<string> GetSetOfTargetArtifacts(TLSimilarityMatrix matrix)
314  {
315  return GetSetOfTargetArtifacts(matrix.AllLinks);
316  }
317 
323  public static ISet<string> GetSetOfTargetArtifacts(TLLinksList links)
324  {
325  HashSet<string> artifacts = new HashSet<string>();
326  foreach (TLSingleLink link in links)
327  {
328  artifacts.Add(link.TargetArtifactId);
329  }
330  return artifacts;
331  }
332 
338  public static TLSimilarityMatrix CollapseOverloadedTargets(TLSimilarityMatrix matrix)
339  {
340  Dictionary<string, Dictionary<string, double>> pseudomatrix = new Dictionary<string, Dictionary<string, double>>();
341  foreach (TLSingleLink link in matrix.AllLinks)
342  {
343  if (!pseudomatrix.ContainsKey(link.SourceArtifactId))
344  {
345  pseudomatrix.Add(link.SourceArtifactId, new Dictionary<string,double>());
346  }
347  int startIndex = link.TargetArtifactId.IndexOf('(');
348  string target = (startIndex > 0)
349  ? link.TargetArtifactId.Substring(0, startIndex)
350  : link.TargetArtifactId;
351  if (!pseudomatrix[link.SourceArtifactId].ContainsKey(target))
352  {
353  pseudomatrix[link.SourceArtifactId].Add(target, link.Score);
354  }
355  else
356  {
357  if (link.Score > pseudomatrix[link.SourceArtifactId][target])
358  {
359  pseudomatrix[link.SourceArtifactId][target] = link.Score;
360  }
361  }
362  }
363  TLSimilarityMatrix collapsedMatrix = new TLSimilarityMatrix();
364  foreach (string sourceID in pseudomatrix.Keys)
365  {
366  foreach (string targetID in pseudomatrix[sourceID].Keys)
367  {
368  collapsedMatrix.AddLink(sourceID, targetID, pseudomatrix[sourceID][targetID]);
369  }
370  }
371  return collapsedMatrix;
372  }
373 
374  #endregion
375  }
376 }