TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
SnowballStemmer.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Text;
19 using TraceLabSDK.Types;
20 
21 namespace TraceLab.Components.DevelopmentKit.Preprocessors.Stemmers.Snowball
22 {
26  public static class SnowballStemmer
27  {
34  public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection artifacts, SnowballStemmerEnum langauge)
35  {
36  TLArtifactsCollection processed = new TLArtifactsCollection();
37  foreach (TLArtifact artifact in artifacts.Values)
38  {
39  TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty);
40  processedArtifact.Text = ProcessText(artifact.Text, langauge);
41  processed.Add(processedArtifact);
42  }
43  return processed;
44  }
45 
52  public static string ProcessText(string text, SnowballStemmerEnum language)
53  {
54  StringBuilder builder = new StringBuilder();
55  string result = string.Empty;
56  string stemmedWord;
57  char[] delimiterChars = { ' ' };
58  string[] tokens = text.Split(delimiterChars);
60  foreach (string token in tokens)
61  {
62  stemmedWord = stemmer.Stem(token);
63  builder.AppendFormat("{0} ", stemmedWord);
64  }
65  result = builder.ToString().Trim();
66  return result;
67  }
68  }
69 }