18 using System.Collections.Generic;
21 using TraceLabSDK.Types;
22 using System.Text.RegularExpressions;
24 namespace TraceLab.Components.DevelopmentKit.Preprocessors.Splitters
37 public static TLArtifactsCollection
ProcessArtifacts(TLArtifactsCollection listOfArtifacts,
bool convertToLowercase)
39 TLArtifactsCollection processed =
new TLArtifactsCollection();
40 foreach (TLArtifact artifact
in listOfArtifacts.Values)
42 TLArtifact processedArtifact =
new TLArtifact(artifact.Id, String.Empty);
43 processedArtifact.Text = ProcessText(artifact.Text, convertToLowercase);
44 processed.Add(processedArtifact);
55 public static string ProcessText(
string text,
bool convertToLowercase)
57 string result =
string.Empty;
58 StringBuilder builder =
new StringBuilder();
62 string[] parts = text.Split(
new char[] {
' ',
'\n',
'\t',
'\r',
'\f',
'\v' }, StringSplitOptions.RemoveEmptyEntries);
63 Regex splitter =
new Regex(
@"(?<!^)(?=[A-Z])");
64 foreach (
string part
in parts)
66 string[] words = splitter.Split(part);
67 foreach (
string word
in words)
69 builder.AppendFormat(
"{0} ", word);
72 result = builder.ToString().Trim();
74 if (convertToLowercase)
76 result = result.ToLower();