TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
RTMScript.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using RPlugin.Core;
18 using RPlugin.Exceptions;
19 using System;
20 using System.Collections.Generic;
21 using System.IO;
22 using System.Reflection;
23 using TraceLab.Components.DevelopmentKit.IO;
24 using TraceLab.Components.RPlugin.Properties;
25 using TraceLab.Components.Types.Tracers.InformationRetrieval;
26 using TraceLabSDK.Types;
27 
28 namespace TraceLab.Components.DevelopmentKit.Tracers.InformationRetrieval
29 {
33  public class RTMScript : RScript
34  {
35  private readonly string _baseScript = Settings.Default.Resources + "RTM.R";
36  private readonly string[] _requiredPackages = new string[] { "lda" };
37 
38  private LDACorpus _corpus;
39  private RTMConfig _config;
40  private LDACorpusInfo _info;
41  private string _outputFile;
42 
46  public override string BaseScript
47  {
48  get
49  {
50  return _baseScript;
51  }
52  }
53 
57  public override string[] RequiredPackages
58  {
59  get
60  {
61  return _requiredPackages;
62  }
63  }
64 
71  public RTMScript(TLArtifactsCollection source, TLArtifactsCollection target, RTMConfig config) : base()
72  {
73  _corpus = new LDACorpus("RTM", new TermDocumentMatrix(source), new TermDocumentMatrix(target));
74  _config = config;
75  }
76 
83  public RTMScript(TermDocumentMatrix source, TermDocumentMatrix target, RTMConfig config) : base()
84  {
85  _corpus = new LDACorpus("RTM", source, target);
86  _config = config;
87  }
88 
96  public RTMScript(TermDocumentMatrix matrix, IEnumerable<string> sourceIDs, IEnumerable<string> targetIDs, RTMConfig config)
97  {
98  _corpus = new LDACorpus("RTM", matrix, sourceIDs, targetIDs);
99  _config = config;
100  }
101 
105  public override void PreCompute()
106  {
107  RUtil.RegisterScript(Assembly.GetExecutingAssembly(), _baseScript);
108  _info = _corpus.Save();
109  _outputFile = RUtil.ReserveCacheFile("RTM.out");
110  _arguments = new List<object>();
111  _arguments.Add(_info.Corpus);
112  _arguments.Add(_info.Vocab);
113  _arguments.Add(_info.Edges);
114  _arguments.Add(_info.Links);
115  _arguments.Add(_outputFile);
116  _arguments.Add(_config.NumTopics);
117  _arguments.Add(_config.NumIterations);
118  _arguments.Add(_config.Alpha);
119  _arguments.Add(_config.Eta);
120  _arguments.Add(_config.RTMBeta);
121  _arguments.Add(_config.PredictionBeta);
122  _arguments.Add(_config.Seed);
123  }
124 
130  public override object ImportResults(RScriptResult result)
131  {
132  TextReader rfile = new StreamReader(_outputFile);
133  string rawdata = rfile.ReadToEnd();
134  rfile.Close();
135  TLSimilarityMatrix matrix = new TLSimilarityMatrix();
136  string[] sims = rawdata.Remove(0, 2).Replace(")", String.Empty).Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
137  string[] edges = Generics.ImportStrings(_info.Edges);
138  if (sims.Length != edges.Length)
139  throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + edges.Length);
140  for (int i = 0; i < sims.Length; i++)
141  {
142  string[] split = edges[i].Split();
143  matrix.AddLink(_corpus.Map[Convert.ToInt32(split[0])], _corpus.Map[Convert.ToInt32(split[1])], Convert.ToDouble(sims[i]));
144  }
145  //int src = 0;
146  //int tgt = _source.DocMap.Count;
147  //if (sims.Length != _source.DocMap.Count * _target.DocMap.Count)
148  //{
149  // throw new RDataException("Results are incorrect size: " + sims.Length + " vs " + (_source.DocMap.Count * _target.DocMap.Count));
150  //}
151  //foreach (string sim in sims)
152  //{
153  // matrix.AddLink(_source.DocMap[src], _target.DocMap[tgt - _source.DocMap.Count], Convert.ToDouble(sim.Trim()));
154  // tgt++;
155  // if (tgt == _source.DocMap.Count + _target.DocMap.Count)
156  // {
157  // tgt = _source.DocMap.Count;
158  // src++;
159  // }
160  //}
161  return matrix;
162  }
163  }
164 }