TraceLab Component Library
 All Classes Namespaces Files Functions Variables Enumerations Enumerator Properties
CamelCaseSplitter.cs
Go to the documentation of this file.
1 // TraceLab Component Library
2 // Copyright © 2012-2013 SEMERU
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
16 
17 using System;
18 using System.Collections.Generic;
19 using System.Linq;
20 using System.Text;
21 using TraceLabSDK.Types;
22 using System.Text.RegularExpressions;
23 
24 namespace TraceLab.Components.DevelopmentKit.Preprocessors.Splitters
25 {
29  public static class CamelCaseSplitter
30  {
37  public static TLArtifactsCollection ProcessArtifacts(TLArtifactsCollection listOfArtifacts, bool convertToLowercase)
38  {
39  TLArtifactsCollection processed = new TLArtifactsCollection();
40  foreach (TLArtifact artifact in listOfArtifacts.Values)
41  {
42  TLArtifact processedArtifact = new TLArtifact(artifact.Id, String.Empty);
43  processedArtifact.Text = ProcessText(artifact.Text, convertToLowercase);
44  processed.Add(processedArtifact);
45  }
46  return processed;
47  }
48 
55  public static string ProcessText(string text, bool convertToLowercase)
56  {
57  string result = string.Empty;
58  StringBuilder builder = new StringBuilder();
59  // remove duplicate white spaces...
60  // this method is apparently faster than Regex.Replace(input, "[\s]+", "", RegexOptions.Singleline | RegexOptions.IgnoreCase);
61  // for significantly larger files
62  string[] parts = text.Split(new char[] { ' ', '\n', '\t', '\r', '\f', '\v' }, StringSplitOptions.RemoveEmptyEntries);
63  Regex splitter = new Regex(@"(?<!^)(?=[A-Z])");
64  foreach (string part in parts)
65  {
66  string[] words = splitter.Split(part);
67  foreach (string word in words)
68  {
69  builder.AppendFormat("{0} ", word);
70  }
71  }
72  result = builder.ToString().Trim();
73  //convert to lower case
74  if (convertToLowercase)
75  {
76  result = result.ToLower();
77  }
78  return result;
79 
80  }
81  }
82 }