18 using RPlugin.Exceptions;
20 using System.Collections.Generic;
23 using System.Reflection;
24 using TraceLab.Components.DevelopmentKit.IO;
25 using TraceLab.Components.RPlugin.Properties;
26 using TraceLab.Components.Types.Tracers.InformationRetrieval;
27 using TraceLabSDK.Types;
29 namespace TraceLab.Components.DevelopmentKit.Tracers.InformationRetrieval
36 private readonly
string _baseScript = Settings.Default.Resources +
"LSA.R";
37 private readonly
string[] _requiredPackages =
new string[] {
"lsa" };
39 private TLArtifactsCollection _source;
40 private TLArtifactsCollection _target;
42 private string _outputFile;
43 private string _SourceFile;
44 private string _TargetFile;
45 private string _mapFile;
50 public override string BaseScript
61 public override string[] RequiredPackages
65 return _requiredPackages;
75 public LSAScript(TLArtifactsCollection source, TLArtifactsCollection target,
LSAConfig config) : base()
90 DirectoryInfo corpusDir = SaveArtifactsToCache(_source, _target,
"LSA.corpus");
91 _arguments =
new List<object>();
92 _arguments.Add(corpusDir.FullName);
93 _arguments.Add(_SourceFile);
94 _arguments.Add(_TargetFile);
95 _arguments.Add(_outputFile);
96 _arguments.Add(_config.Dimensions);
108 TextReader resultsMatrix =
new StreamReader(_outputFile);
109 TLSimilarityMatrix matrix =
new TLSimilarityMatrix();
110 string[] sources = resultsMatrix.ReadLine().Split();
112 while ((line = resultsMatrix.ReadLine()) != null)
114 if (String.IsNullOrWhiteSpace(line))
117 string[] entries = line.Split();
118 string entry = ids[Convert.ToInt32(entries[0]) - 1];
119 for (
int i = 0; i < sources.Length; i++)
121 matrix.AddLink(ids[Convert.ToInt32(sources[i]) - 1], entry, Convert.ToDouble(entries[i + 1]));
124 resultsMatrix.Close();
128 private void CheckParameters()
130 if (_config.Dimensions < 1)
132 throw new RDataException(
"Dimensions (" + _config.Dimensions +
") cannot be less than 1.");
134 if (_config.Dimensions > _source.Count + _target.Count)
136 throw new RDataException(
"Dimensions (" + _config.Dimensions +
") cannot be greater than the number of documents (" + (_source.Count + _target.Count) +
").");
140 private DirectoryInfo SaveArtifactsToCache(TLArtifactsCollection source, TLArtifactsCollection target,
string name)
144 TextWriter sourceWriter =
new StreamWriter(sFile);
145 _SourceFile = sFile.Name;
147 TextWriter targetWriter =
new StreamWriter(tFile);
148 _TargetFile = tFile.Name;
150 TextWriter mapWriter =
new StreamWriter(mFile);
151 _mapFile = mFile.Name;
155 foreach (TLArtifact artifact
in source.Values)
157 TextWriter tw =
new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
158 tw.Write(artifact.Text);
161 sourceWriter.WriteLine(fileIndex);
162 mapWriter.WriteLine(artifact.Id);
165 sourceWriter.Flush();
166 sourceWriter.Close();
168 foreach (TLArtifact artifact
in target.Values)
170 TextWriter tw =
new StreamWriter(Path.Combine(infoDir.FullName, fileIndex.ToString()));
171 tw.Write(artifact.Text);
174 targetWriter.WriteLine(fileIndex);
175 mapWriter.WriteLine(artifact.Id);
178 targetWriter.Flush();
179 targetWriter.Close();