TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
StopwordsComponent.cs
Go to the documentation of this file.
1 // TraceLab - Software Traceability Instrument to Facilitate and Empower Traceability Research
2 // Copyright © 2012-2013 CoEST - National Science Foundation MRI-R2 Grant # CNS: 0959924
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using TraceLab.Components.DevelopmentKit.Preprocessors;
18 using TraceLabSDK;
19 using TraceLabSDK.Types;
20 
21 namespace TraceLab.Components.Library.Preprocessors
22 {
23  [Component(Name = "Stopwords Remover",
24  Description = "Removes common stop words, such as 'a', 'the', 'will', etc. It uses a list of stopwords previously imported to the Workspace.",
25  Author = "SAREC",
26  Version = "1.0.0.0",
27  ConfigurationType = typeof(StopwordsComponentConfig))]
28  [IOSpec(IOSpecType.Input, "listOfArtifacts", typeof(TraceLabSDK.Types.TLArtifactsCollection))]
29  [IOSpec(IOSpecType.Input, "Stopwords", typeof(TraceLabSDK.Types.TLStopwords))]
30  [IOSpec(IOSpecType.Output, "listOfArtifacts", typeof(TraceLabSDK.Types.TLArtifactsCollection))]
31  [Tag("Preprocessors")]
32  public class StopWordsComponent : BaseComponent
33  {
34  private StopwordsComponentConfig _config;
35 
36  public StopWordsComponent(ComponentLogger log)
37  : base(log)
38  {
39  _config = new StopwordsComponentConfig();
40  Configuration = _config;
41  }
42 
43  public override void Compute()
44  {
45  TLArtifactsCollection listOfArtifacts = (TLArtifactsCollection)Workspace.Load("listOfArtifacts");
46  TLStopwords stopwords = (TLStopwords)Workspace.Load("Stopwords");
47  TLArtifactsCollection removed = StopwordsRemover.ProcessArtifacts(listOfArtifacts, stopwords, _config.MinWordLength, _config.RemoveNumbers);
48  Workspace.Store("listOfArtifacts", removed);
49  }
50  }
51 
53  {
54  public int MinWordLength { get; set; }
55  public bool RemoveNumbers { get; set; }
56  }
57 }