18 using System.Collections.Generic;
21 using System.Text.RegularExpressions;
22 using TraceLabSDK.Types;
24 namespace TraceLab.Components.DevelopmentKit.Preprocessors
37 public static TLArtifactsCollection
ProcessArtifacts(TLArtifactsCollection listOfArtifacts,
bool convertToLowercase)
39 TLArtifactsCollection processed =
new TLArtifactsCollection();
40 foreach (TLArtifact artifact
in listOfArtifacts.Values)
42 TLArtifact processedArtifact =
new TLArtifact(artifact.Id, String.Empty);
43 processedArtifact.Text = ProcessText(artifact.Text, convertToLowercase);
44 processed.Add(processedArtifact);
55 public static string ProcessText(
string text,
bool convertToLowercase)
57 string result =
string.Empty;
58 StringBuilder builder =
new StringBuilder();
60 string cleanText = Regex.Replace(text,
"[^A-Za-z0-9 ]",
" ");
65 string[] parts = cleanText.Split(
new char[] {
' ',
'\n',
'\t',
'\r',
'\f',
'\v' }, StringSplitOptions.RemoveEmptyEntries);
66 foreach (
string part
in parts)
68 builder.AppendFormat(
"{0} ", part);
70 result = builder.ToString().Trim();
72 if (convertToLowercase)
74 result = result.ToLower();