TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
SemeruSplitter.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Collections.Generic;
19 using System.Linq;
20 using System.Text;
21 using TraceLabSDK.Types;
22 
23 namespace TraceLab.Components.DevelopmentKit.Preprocessors.Splitters
24 {
28  public static class SemeruSplitter
29  {
36  public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection listOfArtifacts, bool keepCompoundIdentifier)
37  {
38  TLArtifactsCollection processed = new TLArtifactsCollection();
39  foreach (TLArtifact artifact in listOfArtifacts.Values)
40  {
41  TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty);
42  processedArtifact.Text = ProcessText(artifact.Text, keepCompoundIdentifier);
43  processed.Add(processedArtifact);
44  }
45  return processed;
46  }
47 
54  public static string ProcessText(string originalBuffer, bool keepCompoundIdentifier)
55  {
56  string[] words = originalBuffer.Split();
57 
58  StringBuilder newBuffer = new StringBuilder();
59  bool isCompoundIdentifier;
60 
61  foreach (string word in words)
62  {
63  string originalWord = word;
64  if (word.Length == 0)
65  continue;
66 
67  StringBuilder newWord;
68 
69  isCompoundIdentifier = false;
70  if (word.IndexOf('_') >= 0)
71  {
72  isCompoundIdentifier = true;
73  newWord = new StringBuilder(word.Replace("_", " "));
74  }
75  else
76  {
77  newWord = new StringBuilder(word);
78  }
79 
80  for (int i = newWord.Length - 1; i >= 0; i--)
81  {
82  if (Char.IsUpper(newWord.ToString()[i]))
83  {
84  if (i > 0)
85  if (Char.IsLower(newWord.ToString()[i-1]))
86  {
87  newWord.Insert(i, ' ');
88  isCompoundIdentifier=true;
89  }
90  }
91  else
92  if (Char.IsLower(newWord.ToString()[i]))
93  {
94  if (i > 0)
95  if (Char.IsUpper(newWord.ToString()[i-1]))
96  {
97  newWord.Insert(i - 1, ' ');
98  isCompoundIdentifier = true;
99  }
100  }
101  }
102 
103  newBuffer.Append(newWord.ToString().ToLower());
104  newBuffer.Append(' ');
105  if (keepCompoundIdentifier)
106  {
107  if (isCompoundIdentifier)
108  {
109  newBuffer.Append(originalWord.ToLower());
110  newBuffer.Append(' ');
111  }
112  }
113  }
114  return newBuffer.ToString();
115  }
116  }
117 }