TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
POSExtractorComponent.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System.Collections.Generic;
18 using System.ComponentModel;
19 using TraceLab.Components.DevelopmentKit.Preprocessors;
20 using TraceLabSDK;
21 using TraceLabSDK.Component.Config;
22 using TraceLabSDK.Types;
23 
24 namespace TraceLab.Components.Library.Preprocessors
25 {
26  [Component(Name = "Part-of-speech Extractor",
27  Description = "Extracts terms from a TLArtifactsCollection based on the part of speech specified. Uses the Stanford NLP POS-tagger.",
28  Author = "SEMERU; Evan Moritz",
29  Version = "1.0.0.0",
30  ConfigurationType = typeof(POSExtractorConfig))]
31  [IOSpec(IOSpecType.Input, "ListOfArtifacts", typeof(TLArtifactsCollection))]
32  [IOSpec(IOSpecType.Output, "ListOfArtifacts", typeof(TLArtifactsCollection))]
33  [Tag("Preprocessors")]
34  public class POSExtractorComponent : BaseComponent
35  {
36  private POSExtractorConfig _config;
37 
38  public POSExtractorComponent(ComponentLogger log)
39  : base(log)
40  {
41  _config = new POSExtractorConfig();
42  Configuration = _config;
43  }
44 
45  public override void Compute()
46  {
47  Logger.Trace("Starting POSExtractor. This may take awhile (especially the bidirectional models)....");
48  TLArtifactsCollection artifacts = (TLArtifactsCollection)Workspace.Load("ListOfArtifacts");
49  //TLArtifactsCollection extracted = POSTagger.Extract(artifacts, _config.POS, _config.ModelFile);
50  TLArtifactsCollection extracted = new TLArtifactsCollection();
51  int count = 1;
52  foreach (KeyValuePair<string, TLArtifact> artifactKVP in artifacts)
53  {
54  extracted.Add(artifactKVP.Key, POSTagger.ExtractArtifact(artifactKVP.Value, _config.POS, _config.ModelFile));
55  Logger.Trace("Extracted " + count + "/" + artifacts.Count);
56  count++;
57  }
58  Workspace.Store("ListOfArtifacts", extracted);
59  }
60  }
61 
62  public class POSExtractorConfig
63  {
64  [DisplayName("Part of Speech")]
65  [Description("The desired part of speech for extraction.")]
66  public POSTagger.POSTaggerSpeechType POS { get; set; }
67 
68  [DisplayName("Training Model")]
69  [Description("The training model used to tag the parts of speech.")]
70  public FilePath ModelFile { get; set; }
71  }
72 }