18 using RPlugin.Exceptions;
20 using System.Collections.Generic;
22 using System.Reflection;
23 using TraceLab.Components.RPlugin.Properties;
24 using TraceLab.Components.Types.Tracers.InformationRetrieval;
25 using TraceLabSDK.Types;
27 namespace TraceLab.Components.DevelopmentKit.Tracers.InformationRetrieval
34 private readonly
string _baseScript = Settings.Default.Resources +
"LDA.R";
35 private readonly
string[] _requiredPackages =
new string[] {
"lda" };
40 private string _outputFile;
45 public override string BaseScript
56 public override string[] RequiredPackages
60 return _requiredPackages;
70 public LDAScript(TLArtifactsCollection source, TLArtifactsCollection target,
LDAConfig config) : base()
99 _arguments =
new List<object>();
100 _arguments.Add(info.
Corpus);
101 _arguments.Add(info.
Vocab);
102 _arguments.Add(info.
Edges);
103 _arguments.Add(_outputFile);
104 _arguments.Add(_config.NumTopics);
105 _arguments.Add(_config.NumIterations);
106 _arguments.Add(_config.Alpha);
107 _arguments.Add(_config.Eta);
108 _arguments.Add(_config.PredictionBeta);
109 _arguments.Add(_config.Seed);
119 TextReader rfile =
new StreamReader(_outputFile);
120 string rawdata = rfile.ReadToEnd();
122 TLSimilarityMatrix matrix =
new TLSimilarityMatrix();
123 string[] sims = rawdata.Remove(0,2).Replace(
")", String.Empty).Split(
new char[] {
','}, StringSplitOptions.RemoveEmptyEntries);
125 int tgt = _source.DocMap.Count;
126 if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
128 throw new RDataException(
"Results are incorrect size: " + sims.Length +
" vs " + (_source.DocMap.Count * _target.DocMap.Count));
130 foreach (
string sim
in sims)
132 matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
134 if (tgt == _source.DocMap.Count + _target.DocMap.Count)
136 tgt = _source.DocMap.Count;