TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
PorterStemmerUtils.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Text;
19 using TraceLabSDK.Types;
20 
21 namespace TraceLab.Components.DevelopmentKit.Preprocessors.Stemmers.Porter
22 {
26  public static class PorterStemmerUtils
27  {
33  public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection listOfArtifacts)
34  {
35  TLArtifactsCollection processed = new TLArtifactsCollection();
36  foreach (TLArtifact artifact in listOfArtifacts.Values)
37  {
38  TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty);
39  processedArtifact.Text = ProcessText(artifact.Text);
40  processed.Add(processedArtifact);
41  }
42  return processed;
43  }
44 
50  public static string ProcessText(string textToProcess)
51  {
52  StringBuilder builder = new StringBuilder();
53  string result = string.Empty;
54  string stemmedWord;
55  char[] delimiterChars = { ' ' };
56  string[] tokens = textToProcess.Split(delimiterChars);
57  PorterStemmer porterStemmer = new PorterStemmer();
58  foreach (string token in tokens)
59  {
60  stemmedWord = porterStemmer.stemTerm(token);
61  builder.AppendFormat("{0} ", stemmedWord);
62  }
63  result = builder.ToString().Trim();
64  return result;
65  }
66  }
67 }