TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
PageRankScript.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Collections.Generic;
19 using System.IO;
20 using System.Linq;
21 using System.Reflection;
22 using RPlugin.Core;
23 using RPlugin.Exceptions;
24 using TraceLab.Components.DevelopmentKit.IO;
25 using TraceLab.Components.RPlugin.Properties;
26 using TraceLab.Components.Types.Preprocessors.ExecutionTraces;
27 using TraceLab.Components.Types.Tracers.WebMining;
28 using TraceLabSDK.Types;
29 
30 namespace TraceLab.Components.DevelopmentKit.Tracers.WebMining
31 {
35  public class PageRankScript : RScript
36  {
37  private readonly string _baseScript = Settings.Default.Resources + "PageRank.R";
38  private readonly string[] _requiredPackages = new string[] { "base" };
39 
40  private PDG _pdg;
41  private string _mappingFile;
42  private string _outputFile;
43  private PageRankConfig _config;
44  private string _traceID;
45 
49  public override string BaseScript
50  {
51  get
52  {
53  return _baseScript;
54  }
55  }
56 
60  public override string[] RequiredPackages
61  {
62  get
63  {
64  return _requiredPackages;
65  }
66  }
67 
74  public PageRankScript(string traceID, PDG pdg, PageRankConfig config) : base()
75  {
76  _pdg = pdg;
77  _config = config;
78  _traceID = traceID;
79  }
80 
84  public override void PreCompute()
85  {
86  RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
87 
88  switch (_config.Weight)
89  {
90  case WebMiningWeightEnum.Binary:
91  NormalizeEdgeBinary();
92  break;
93  case WebMiningWeightEnum.Frequency:
94  NormalizeEdgeFrequencies();
95  break;
96  default:
97  throw new RExecutionException("Unknown weighting scheme: " + _config.Weight);
98  }
99 
100  string matrixFile = GenerateTransitionProbabilityMatrix();
101  _outputFile = RUtil.ReserveCacheFile("PageRank" + _traceID + ".out");
102  _arguments = new List<object>();
103  _arguments.Add(matrixFile);
104  _arguments.Add(_outputFile);
105  _arguments.Add(_config.Beta);
106  _arguments.Add(_config.Epsilon);
107  }
108 
114  public override object ImportResults(RScriptResult result)
115  {
116  IEnumerable<double> ranks = Generics.ImportDoubles(_outputFile, false);
117  IEnumerable<string> map = Generics.ImportStrings(_mappingFile);
118  if (ranks.Count() != map.Count())
119  {
120  throw new RDataException("Results file in incorrect format: incorrect number of entries");
121  }
122  TLSimilarityMatrix rankList = new TLSimilarityMatrix();
123  for (int i = 0; i < map.Count(); i++)
124  {
125  rankList.AddLink(_traceID, map.ElementAt(i), ranks.ElementAt(i));
126  }
127  return rankList;
128  }
129 
130  #region Private methods
131 
132  private void NormalizeEdgeFrequencies()
133  {
134  // nodes
135  foreach (PDGNode node in _pdg.Nodes)
136  {
137  double sumNodeFrequencies = 0.0;
138  // node outgoing edges
139  foreach (PDGEdge edge in node.OutgoingEdges)
140  {
141  sumNodeFrequencies += edge.Weight;
142  }
143  foreach (PDGEdge edge in node.OutgoingEdges)
144  {
145  edge.Weight = edge.Weight / sumNodeFrequencies;
146  }
147  }
148  }
149 
150  private void NormalizeEdgeBinary()
151  {
152  // nodes
153  foreach (PDGNode node in _pdg.Nodes)
154  {
155  double binaryWeight = node.OutgoingEdges.Count();
156  if (!binaryWeight.Equals(0.0))
157  {
158  binaryWeight = 1.0 / binaryWeight;
159  }
160  foreach (PDGEdge edge in _pdg.GetNode(node.MethodName).OutgoingEdges)
161  {
162  edge.Weight = binaryWeight;
163  }
164  }
165  }
166 
167  private string GenerateTransitionProbabilityMatrix()
168  {
169  int n = _pdg.Nodes.Count();
170  double defaultValue = 1.0 / n;
171  double[] rowValues = new double[n];
172 
173  FileStream matrixFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.matrix");
174  TextWriter matrixWriter = new StreamWriter(matrixFS);
175 
176  FileStream edgeFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.edges");
177  TextWriter edgeWriter = new StreamWriter(edgeFS);
178 
179  FileStream mapFS = RUtil.CreateCacheFile("PageRank." + _traceID + ".TPM.map");
180  _mappingFile = mapFS.Name;
181  TextWriter mapWriter = new StreamWriter(mapFS);
182 
183  for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++)
184  {
185  PDGNode pdgNode = _pdg.GetNode(nodeIndex);
186 
187  if (pdgNode.OutgoingEdges.Count() == 0)
188  {
189  for (int i = 0; i < n; i++)
190  {
191  rowValues[i] = defaultValue;
192  }
193  }
194  else
195  {
196  for (int i = 0; i < n; i++)
197  {
198  rowValues[i] = 0.0;
199  }
200  }
201 
202  edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count()); // write number of outgoing edges for most of the advanced PageRank algorithms
203 
204  for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++)
205  {
206  PDGEdge pdgOutgoingEdge = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge);
207  int columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID);
208  // for positive values only
209  if (columnFrequencies == -1)
210  {
211  throw new RDataException("Invalid column index.");
212  // continue;
213  }
214  rowValues[columnFrequencies] = pdgOutgoingEdge.Weight;
215  }
216 
217  //for (int i=1;i<=n;i++)
218  //{
219  // matrixWriter.Write(rowValues[i]+" ");
220  //}
221  matrixWriter.WriteLine(String.Join(" ", rowValues));
222  mapWriter.WriteLine(pdgNode.MethodName);
223  }
224 
225  matrixWriter.Flush();
226  matrixWriter.Close();
227  edgeWriter.Flush();
228  edgeWriter.Close();
229  mapWriter.Flush();
230  mapWriter.Close();
231  return matrixFS.Name;
232  }
233 
234  #endregion
235  }
236 }