TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
HITSScript.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Collections.Generic;
19 using System.IO;
20 using System.Linq;
21 using System.Reflection;
22 using RPlugin.Core;
23 using RPlugin.Exceptions;
24 using TraceLab.Components.DevelopmentKit.IO;
25 using TraceLab.Components.RPlugin.Properties;
26 using TraceLab.Components.Types.Preprocessors.ExecutionTraces;
27 using TraceLab.Components.Types.Tracers.WebMining;
28 
29 namespace TraceLab.Components.DevelopmentKit.Tracers.WebMining
30 {
34  public class HITSScript : RScript
35  {
36  private readonly string _baseScript = Settings.Default.Resources + "HITS.R";
37  private readonly string[] _requiredPackages = new string[] { "base" };
38 
39  private PDG _pdg;
40  private string _mappingFile;
41  private string _authorityFile;
42  private string _hubFile;
43  private HITSConfig _config;
44  private string _traceID;
45 
49  public override string BaseScript
50  {
51  get
52  {
53  return _baseScript;
54  }
55  }
56 
60  public override string[] RequiredPackages
61  {
62  get
63  {
64  return _requiredPackages;
65  }
66  }
67 
74  public HITSScript(string traceID, PDG pdg, HITSConfig config) : base()
75  {
76  _pdg = pdg;
77  _config = config;
78  _traceID = traceID;
79  }
80 
84  public override void PreCompute()
85  {
86  RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
87  string matrixFile = GenerateAdjacencyMatrix();
88  _authorityFile = RUtil.ReserveCacheFile("HITS." + _traceID + ".authorities");
89  _hubFile = RUtil.ReserveCacheFile("HITS." + _traceID + ".hubs");
90  _arguments = new List<object>();
91  _arguments.Add(matrixFile);
92  _arguments.Add(_authorityFile);
93  _arguments.Add(_hubFile);
94  _arguments.Add(_config.Epsilon);
95  }
96 
102  public override object ImportResults(RScriptResult result)
103  {
104  IEnumerable<double> authorities = Generics.ImportDoubles(_authorityFile, false);
105  IEnumerable<double> hubs = Generics.ImportDoubles(_hubFile, false);
106  IEnumerable<string> map = Generics.ImportStrings(_mappingFile);
107  HITSResult results = new HITSResult();
108  for (int i = 0; i < map.Count(); i++)
109  {
110  results.Hubs.AddLink(_traceID, map.ElementAt(i), hubs.ElementAt(i));
111  results.Authorities.AddLink(_traceID, map.ElementAt(i), authorities.ElementAt(i));
112  }
113  return results;
114  }
115 
116  #region Private methods
117 
118  private string GenerateAdjacencyMatrix()
119  {
120  int n = _pdg.Nodes.Count();
121  double defaultValue = 1.0 / n;
122  double[] rowValues = new double[n];
123 
124  FileStream matrixFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.matrix");
125  TextWriter matrixWriter = new StreamWriter(matrixFS);
126 
127  //FileStream edgeFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.edges");
128  //TextWriter edgeWriter = new StreamWriter(edgeFS);
129 
130  FileStream mapFS = RUtil.CreateCacheFile("HITS." + _traceID + ".TPM.map");
131  _mappingFile = mapFS.Name;
132  TextWriter mapWriter = new StreamWriter(mapFS);
133 
134  for (int nodeIndex = 0; nodeIndex < _pdg.Nodes.Count(); nodeIndex++)
135  {
136  PDGNode pdgNode = _pdg.GetNode(nodeIndex);
137 
138  for (int i = 0; i < n; i++)
139  {
140  rowValues[i] = 0;
141  }
142 
143  //edgeWriter.WriteLine(pdgNode.OutgoingEdges.Count()); // write number of outgoing edges for Topical HITS algorithm
144 
145  for (int indexOutgoingEdge = 0; indexOutgoingEdge < pdgNode.OutgoingEdges.Count(); indexOutgoingEdge++)
146  {
147  PDGEdge pdgOutgoingEdge = pdgNode.OutgoingEdges.ElementAt(indexOutgoingEdge);
148  int columnFrequencies = _pdg.IndexOf(pdgOutgoingEdge.OutgoingNodeID);
149 
150  // for positive values only
151  if ((columnFrequencies < 0))
152  {
153  throw new RDataException();
154  // continue;
155  }
156  if (_config.Weight == WebMiningWeightEnum.Binary)
157  {
158  rowValues[columnFrequencies] = 1;
159  }
160  else
161  if (_config.Weight == WebMiningWeightEnum.Frequency)
162  {
163  rowValues[columnFrequencies] = pdgOutgoingEdge.Weight;
164  }
165  else
166  {
167  throw new RDataException("Unknown weighting scheme: " + _config.Weight);
168  }
169  }
170 
171  //for (int i=1;i<=n;i++)
172  //{
173  // matrixWriter.Write(rowValuesFrequencies[i]+" ");
174  // binaryWriter.Write(rowValuesBinary[i]+" ");
175  //}
176  matrixWriter.WriteLine(String.Join(" ", rowValues));
177  mapWriter.WriteLine(pdgNode.MethodName);
178  }
179 
180  matrixWriter.Flush();
181  matrixWriter.Close();
182  //edgeWriter.Flush();
183  //edgeWriter.Close();
184  mapWriter.Flush();
185  mapWriter.Close();
186  return matrixFS.Name;
187  }
188 
189  #endregion
190  }
191 }