TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
LSAScript.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using RPlugin.Core;
18 using RPlugin.Exceptions;
19 using System;
20 using System.Collections.Generic;
21 using System.IO;
22 using System.Linq;
23 using System.Reflection;
24 using TraceLab.Components.DevelopmentKit.IO;
25 using TraceLab.Components.RPlugin.Properties;
26 using TraceLab.Components.Types.Tracers.InformationRetrieval;
27 using TraceLabSDK.Types;
28 
29 namespace TraceLab.Components.DevelopmentKit.Tracers.InformationRetrieval
30 {
34  public class LSAScript : RScript
35  {
36  private readonly string _baseScript = Settings.Default.Resources + "LSA.R";
37  private readonly string[] _requiredPackages = new string[] { "lsa" };
38 
39  private TLArtifactsCollection _source;
40  private TLArtifactsCollection _target;
41  private LSAConfig _config;
42  private string _outputFile;
43  private string _SourceFile;
44  private string _TargetFile;
45  private string _mapFile;
46 
50  public override string BaseScript
51  {
52  get
53  {
54  return _baseScript;
55  }
56  }
57 
61  public override string[] RequiredPackages
62  {
63  get
64  {
65  return _requiredPackages;
66  }
67  }
68 
75  public LSAScript(TLArtifactsCollection source, TLArtifactsCollection target, LSAConfig config) : base()
76  {
77  _source = source;
78  _target = target;
79  _config = config;
80  }
81 
85  public override void PreCompute()
86  {
87  RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
88  CheckParameters();
89  _outputFile = RUtil.ReserveCacheFile("LSA.out");
90  DirectoryInfo corpusDir = SaveArtifactsToCache(_source, _target, "LSA.corpus");
91  _arguments = new List<object>();
92  _arguments.Add(corpusDir.FullName);
93  _arguments.Add(_SourceFile);
94  _arguments.Add(_TargetFile);
95  _arguments.Add(_outputFile);
96  _arguments.Add(_config.Dimensions);
97  }
98 
104  public override object ImportResults(RScriptResult result)
105  {
106  // index = id - 1
107  string[] ids = Generics.ImportStrings(_mapFile);
108  TextReader resultsMatrix = new StreamReader(_outputFile);
109  TLSimilarityMatrix matrix = new TLSimilarityMatrix();
110  string[] sources = resultsMatrix.ReadLine().Split();
111  string line;
112  while ((line = resultsMatrix.ReadLine()) != null)
113  {
114  if (String.IsNullOrWhiteSpace(line))
115  continue;
116  // [0] target id, [x+] source sims index = x - 1
117  string[] entries = line.Split();
118  string entry = ids[Convert.ToInt32(entries[0]) - 1];
119  for (int i = 0; i < sources.Length; i++)
120  {
121  matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1]));
122  }
123  }
124  resultsMatrix.Close();
125  return matrix;
126  }
127 
128  private void CheckParameters()
129  {
130  if (_config.Dimensions < 1)
131  {
132  throw new RDataException("Dimensions (" + _config.Dimensions + ") cannot be less than 1.");
133  }
134  if (_config.Dimensions > _source.Count + _target.Count)
135  {
136  throw new RDataException("Dimensions (" + _config.Dimensions + ") cannot be greater than the number of documents (" + (_source.Count + _target.Count) + ").");
137  }
138  }
139 
140  private DirectoryInfo SaveArtifactsToCache(TLArtifactsCollection source, TLArtifactsCollection target, string name)
141  {
142  DirectoryInfo infoDir = RUtil.CreateCacheDirectory(name);
143  FileStream sFile = RUtil.CreateCacheFile("LSA.corpus.source");
144  TextWriter sourceWriter = new StreamWriter(sFile);
145  _SourceFile = sFile.Name;
146  FileStream tFile = RUtil.CreateCacheFile("LSA.corpus.target");
147  TextWriter targetWriter = new StreamWriter(tFile);
148  _TargetFile = tFile.Name;
149  FileStream mFile = RUtil.CreateCacheFile("LSA.corpus.map");
150  TextWriter mapWriter = new StreamWriter(mFile);
151  _mapFile = mFile.Name;
152 
153  int fileIndex = 1;
154 
155  foreach (TLArtifact artifact in source.Values)
156  {
157  TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
158  tw.Write(artifact.Text);
159  tw.Flush();
160  tw.Close();
161  sourceWriter.WriteLine(fileIndex);
162  mapWriter.WriteLine(artifact.Id);
163  fileIndex++;
164  }
165  sourceWriter.Flush();
166  sourceWriter.Close();
167 
168  foreach (TLArtifact artifact in target.Values)
169  {
170  TextWriter tw = new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
171  tw.Write(artifact.Text);
172  tw.Flush();
173  tw.Close();
174  targetWriter.WriteLine(fileIndex);
175  mapWriter.WriteLine(artifact.Id);
176  fileIndex++;
177  }
178  targetWriter.Flush();
179  targetWriter.Close();
180 
181  mapWriter.Flush();
182  mapWriter.Close();
183 
184  return infoDir;
185  }
186  }
187 }