Author: rezan
Date: Wed Jul 29 18:04:04 2015
New Revision: 1693327
URL: http://svn.apache.org/r1693327
Log:
tokenization
Modified:
devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
devicemap/trunk/clients/2.0/reference/src/Main.java
Modified: devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java
URL:
http://svn.apache.org/viewvc/devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java?rev=1693327&r1=1693326&r2=1693327&view=diff
==============================================================================
--- devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java (original)
+++ devicemap/trunk/clients/2.0/reference/src/DeviceMapClient.java Wed Jul 29
18:04:04 2015
@@ -33,7 +33,7 @@ public class DeviceMapClient {
private String domainVersion;
private List<Transformer> transformers;
- private List<String> tokenSeperators;
+ private List<byte[]> tokenSeperators;
private int ngramConcatSize;
private String defaultId;
@@ -43,7 +43,7 @@ public class DeviceMapClient {
domainVersion = null;
transformers = new ArrayList<Transformer>();
- tokenSeperators = new ArrayList<String>();
+ tokenSeperators = new ArrayList<byte[]>();
ngramConcatSize = 1;
defaultId = null;
@@ -92,13 +92,17 @@ public class DeviceMapClient {
if(get(inputParser, "tokenSeperators").isArray()) {
if(patch) {
- tokenSeperators = new ArrayList<String>();
+ tokenSeperators = new ArrayList<byte[]>();
}
for(Iterator<JsonNode> i =
inputParser.get("tokenSeperators").iterator(); i.hasNext();) {
JsonNode tokenSeperator = i.next();
- tokenSeperators.add(tokenSeperator.asText());
+ if(tokenSeperator.asText().isEmpty()) {
+ throw new Exception("Empty tokenSeperator not allowed");
+ }
+
+ tokenSeperators.add(tokenSeperator.asText().getBytes());
Main.log(" Found tokenSeperator: '" + tokenSeperator.asText() +
"'");
}
@@ -237,6 +241,46 @@ public class DeviceMapClient {
}
Main.log(" Transformed: '" + transformed + "'");
+
+ //tokenization using bytes
+ List<String> tokens = new ArrayList<String>();
+
+ byte[] source = transformed.getBytes();
+ int sourcePos = 0;
+
+ byte[] dest = new byte[source.length];
+ int destPos = 0;
+
+ source:
+ while(sourcePos < source.length) {
+ seperator:
+ for(byte[] seperator : tokenSeperators) {
+ int i;
+
+ for(i = 0; i < seperator.length; i++) {
+ if(source[sourcePos + i] != seperator[i]) {
+ continue seperator;
+ }
+ }
+
+ if(destPos > 0) {
+ tokens.add(new String(dest, 0, destPos));
+ destPos = 0;
+ }
+
+ sourcePos += i;
+
+ continue source;
+ }
+
+ dest[destPos++] = source[sourcePos++];
+ }
+
+ if(destPos > 0) {
+ tokens.add(new String(dest, 0, destPos));
+ }
+
+ Main.log(" Tokens: " + tokens);
return "";
}
Modified: devicemap/trunk/clients/2.0/reference/src/Main.java
URL:
http://svn.apache.org/viewvc/devicemap/trunk/clients/2.0/reference/src/Main.java?rev=1693327&r1=1693326&r2=1693327&view=diff
==============================================================================
--- devicemap/trunk/clients/2.0/reference/src/Main.java (original)
+++ devicemap/trunk/clients/2.0/reference/src/Main.java Wed Jul 29 18:04:04 2015
@@ -83,6 +83,7 @@ public class Main {
if(testString != null) {
Main.log("Test string: '" + testString + "'");
+ String result = client.classify(testString);
}
if(failure) {