Author: pkluegl
Date: Wed Nov 6 10:03:05 2019
New Revision: 1869448
URL: http://svn.apache.org/viewvc?rev=1869448&view=rev
Log:
UIMA-6092: fixed bug in lookup, dictRemoveWS default set to true
Added:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
(original)
+++
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
Wed Nov 6 10:03:05 2019
@@ -390,8 +390,8 @@ public class RutaEngine extends JCasAnno
*/
public static final String PARAM_DICT_REMOVE_WS = "dictRemoveWS";
- @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false,
defaultValue = "false")
- private Boolean dictRemoveWS = false;
+ @ConfigurationParameter(name = PARAM_DICT_REMOVE_WS, mandatory = false,
defaultValue = "true")
+ private Boolean dictRemoveWS = true;
/**
* If this parameter is set to any String value then this String/token is
used to split columns in
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
(original)
+++
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
Wed Nov 6 10:03:05 2019
@@ -268,7 +268,7 @@ public class TreeWordList implements Rut
TextNode wsNode = pointer.getChildNode(' ');
if (ignoreWS && wsNode != null) {
- result |= recursiveContains(wsNode, text, --next, ignoreCase,
fragment, ignoreChars,
+ result |= recursiveContains(wsNode, text, next - 1, ignoreCase,
fragment, ignoreChars,
maxIgnoreChars, ignoreWS);
}
@@ -292,7 +292,7 @@ public class TreeWordList implements Rut
} else {
TextNode wsNode = pointer.getChildNode(' ');
if (ignoreWS && wsNode != null) {
- result |= recursiveContains(wsNode, text, --next, ignoreCase,
fragment, ignoreChars,
+ result |= recursiveContains(wsNode, text, next - 1, ignoreCase,
fragment, ignoreChars,
maxIgnoreChars, ignoreWS);
}
Modified:
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
(original)
+++
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml
Wed Nov 6 10:03:05 2019
@@ -336,6 +336,12 @@
<boolean>true</boolean>
</value>
</nameValuePair>
+ <nameValuePair>
+ <name>dictRemoveWS</name>
+ <value>
+ <boolean>true</boolean>
+ </value>
+ </nameValuePair>
<nameValuePair>
<name>inferenceVisitors</name>
<value>
Modified:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
(original)
+++
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkFastTest.java
Wed Nov 6 10:03:05 2019
@@ -43,7 +43,7 @@ public class MarkFastTest {
RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0");
RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
- RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100");
+ RutaTestUtils.assertAnnotationsEquals(cas, 3, 3, "1 0 0", "100", "2 0 0");
RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0");
}
Modified:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java?rev=1869448&r1=1869447&r2=1869448&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
(original)
+++
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/ResourcesFromDataPathTest.java
Wed Nov 6 10:03:05 2019
@@ -66,8 +66,8 @@ public class ResourcesFromDataPathTest {
ae.process(cas);
RutaTestUtils.assertAnnotationsEquals(cas, 1, 3, "1 0 0", "100", "2 0 0");
- RutaTestUtils.assertAnnotationsEquals(cas, 2, 0);
+ RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "1 0 0", "100", "2 0 0");
RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "100");
- RutaTestUtils.assertAnnotationsEquals(cas, 4, 2, "1 0 0", "2 0 0");
+ RutaTestUtils.assertAnnotationsEquals(cas, 4, 1, "100");
}
}
Added:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java?rev=1869448&view=auto
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
(added)
+++
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/resource/TreeWordListTest.java
Wed Nov 6 10:03:05 2019
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.ruta.resource;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.FilterManager;
+import org.apache.uima.ruta.RutaStream;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.apache.uima.ruta.seed.TextSeeder;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.apache.uima.ruta.visitor.InferenceCrowd;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TreeWordListTest {
+
+ @Test
+ public void testWithAction() throws Exception {
+
+ String text = "ab";
+ String script = "STRINGLIST list = {\"ab\", \"a c\", \"a d\"};";
+ script += "MARKFAST(T1, list);";
+
+ CAS cas = RutaTestUtils.getCAS(text);
+ Ruta.apply(cas, script);
+
+ RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text);
+ }
+
+ @Test
+ public void testFind() throws Exception {
+
+ String text = "ab";
+ List<String> data = Arrays.asList(text, "a c", "a d");
+ TreeWordList twl = new TreeWordList(data, false);
+
+ JCas jcas = JCasFactory.createJCas();
+ jcas.setDocumentText(text);
+ CAS cas = jcas.getCas();
+ RutaStream stream = createStream(text, cas);
+
+ List<AnnotationFS> result1 = twl.find(stream, false, 0, null, 0, false);
+ Assert.assertEquals(1, result1.size());
+ Assert.assertEquals(text, result1.get(0).getCoveredText());
+
+ List<AnnotationFS> result2 = twl.find(stream, false, 0, null, 0, true);
+ Assert.assertEquals(1, result2.size());
+ Assert.assertEquals(text, result2.get(0).getCoveredText());
+
+ List<AnnotationFS> result3 = twl.find(stream, true, 0, null, 0, false);
+ Assert.assertEquals(1, result3.size());
+ Assert.assertEquals(text, result3.get(0).getCoveredText());
+ }
+
+ private RutaStream createStream(String text, CAS cas) {
+ Type basicType = cas.getTypeSystem().getType(RutaBasic.class.getName());
+
+ Collection<Type> filterTypes = getDefaultFilterTypes(cas);
+
+ FilterManager filter = new FilterManager(filterTypes, true, cas);
+ TextSeeder seeder = new TextSeeder();
+ seeder.seed(text, cas);
+ InferenceCrowd crowd = new InferenceCrowd(new ArrayList<>());
+ RutaStream stream = new RutaStream(cas, basicType, filter, false, false,
true, null, crowd);
+ stream.initalizeBasics(new String[] { CAS.TYPE_NAME_ANNOTATION }, false);
+ return stream;
+ }
+
+ private Collection<Type> getDefaultFilterTypes(CAS cas) {
+ Collection<Type> filterTypes = new ArrayList<Type>();
+ TypeSystem typeSystem = cas.getTypeSystem();
+ String[] defaultFilteredTypes = new String[] {
"org.apache.uima.ruta.type.SPACE",
+ "org.apache.uima.ruta.type.BREAK", "org.apache.uima.ruta.type.MARKUP"
};
+ for (String each : defaultFilteredTypes) {
+ Type type = typeSystem.getType(each);
+ if (type != null) {
+ filterTypes.add(type);
+ }
+ }
+ return filterTypes;
+ }
+
+}
\ No newline at end of file