Author: joern
Date: Wed Sep 23 14:43:45 2015
New Revision: 1704862
URL: http://svn.apache.org/viewvc?rev=1704862&view=rev
Log:
OPENNLP-818 Added external resource dependency support to the Dictionary Name
Finder. Thanks to Perter Thygesen for providing a patch!
Added:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResourceImpl.java
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
opennlp/trunk/opennlp-uima/src/test/resources/cas/dictionary-test.xmi
(with props)
opennlp/trunk/opennlp-uima/src/test/resources/dictionary.dic (with props)
opennlp/trunk/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
(with props)
Modified:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
Added:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java?rev=1704862&view=auto
==============================================================================
---
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java
(added)
+++
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResource.java
Wed Sep 23 14:43:45 2015
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.dictionary;
+
+import opennlp.tools.dictionary.Dictionary;
+
+public interface DictionaryResource {
+ Dictionary getDictionary();
+}
Added:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResourceImpl.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResourceImpl.java?rev=1704862&view=auto
==============================================================================
---
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResourceImpl.java
(added)
+++
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/dictionary/DictionaryResourceImpl.java
Wed Sep 23 14:43:45 2015
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.dictionary;
+
+import opennlp.tools.dictionary.Dictionary;
+import opennlp.uima.util.AbstractModelResource;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class DictionaryResourceImpl extends AbstractModelResource<Dictionary>
+ implements DictionaryResource {
+
+ @Override
+ public Dictionary getDictionary() {
+ return model;
+ }
+
+ @Override
+ protected Dictionary loadModel(InputStream in) throws IOException {
+ return new Dictionary(in);
+ }
+}
Modified:
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java?rev=1704862&r1=1704861&r2=1704862&view=diff
==============================================================================
---
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
(original)
+++
opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/DictionaryNameFinder.java
Wed Sep 23 14:43:45 2015
@@ -21,11 +21,13 @@ import java.io.IOException;
import java.io.InputStream;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.Span;
+import opennlp.uima.dictionary.DictionaryResource;
import opennlp.uima.util.AnnotatorUtil;
import opennlp.uima.util.ExceptionMessages;
import opennlp.uima.util.UimaUtil;
import org.apache.uima.cas.CAS;
+import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
public class DictionaryNameFinder extends AbstractNameFinder {
@@ -47,29 +49,37 @@ public class DictionaryNameFinder extend
*
* Note: Do all initialization in this method, do not use the constructor.
*/
- public void initialize()
- throws ResourceInitializationException {
+ public void initialize() throws ResourceInitializationException {
Dictionary nameFinderDictionary;
try {
- String modelName = AnnotatorUtil.getRequiredStringParameter(context,
- UimaUtil.DICTIONARY_PARAMETER);
+ DictionaryResource modelResource = (DictionaryResource) context
+ .getResourceObject(UimaUtil.DICTIONARY_PARAMETER);
- InputStream inModel = AnnotatorUtil
- .getResourceAsStream(context, modelName);
+ nameFinderDictionary = modelResource.getDictionary();
+ } catch (ResourceAccessException e) {
- nameFinderDictionary = new Dictionary(inModel);
+ try {
+ String modelName = AnnotatorUtil.getRequiredStringParameter(context,
+ UimaUtil.DICTIONARY_PARAMETER);
+
+ InputStream inModel = AnnotatorUtil.getResourceAsStream(context,
+ modelName);
+
+ nameFinderDictionary = new Dictionary(inModel);
+
+ } catch (IOException ie) {
+ throw new ResourceInitializationException(
+ ExceptionMessages.MESSAGE_CATALOG,
+ ExceptionMessages.IO_ERROR_DICTIONARY_READING,
+ new Object[] { ie.getMessage() });
+ }
- } catch (IOException e) {
- throw new ResourceInitializationException(
- ExceptionMessages.MESSAGE_CATALOG,
- ExceptionMessages.IO_ERROR_DICTIONARY_READING,
- new Object[] {e.getMessage()});
}
- mNameFinder =
- new opennlp.tools.namefind.DictionaryNameFinder(nameFinderDictionary);
+ mNameFinder = new opennlp.tools.namefind.DictionaryNameFinder(
+ nameFinderDictionary);
}
protected Span[] find(CAS cas, String[] tokens) {
Added:
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java?rev=1704862&view=auto
==============================================================================
---
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
(added)
+++
opennlp/trunk/opennlp-uima/src/test/java/opennlp/uima/dictionary/DictionaryResourceTest.java
Wed Sep 23 14:43:45 2015
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.dictionary;
+
+import opennlp.tools.util.StringList;
+import opennlp.uima.util.CasUtil;
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.junit.Assert.*;
+
+public class DictionaryResourceTest {
+
+ private static final String PATHNAME =
"opennlp-uima/src/test/resources/test-descriptors/";
+
+ private static AnalysisEngine AE;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ AE = produceAE("DictionaryNameFinder.xml");
+ }
+
+ @AfterClass
+ public static void afterClass() {
+ AE.destroy(); // is this necessary?
+ }
+
+ @Test
+ public void testDictionaryWasLoaded() {
+
+ try {
+ DictionaryResource dic = (DictionaryResource) AE.getResourceManager()
+ .getResource("/opennlp.uima.Dictionary");
+ // simple check if ordering always is the same...
+ assertEquals(
+ "[[Berlin], [Stockholm], [New,York], [London], [Copenhagen],
[Paris]]",
+ dic.getDictionary().toString());
+ // else we can do a simple test like this
+ assertEquals("There should be six entries in the dictionary", 6,
+ dic.getDictionary().asStringSet().size());
+ assertTrue("London should be in the dictionary",
+ dic.getDictionary().contains(new StringList("London")));
+ } catch (Exception e) {
+ fail("Dictionary was not loaded.");
+ }
+
+ }
+
+ @Test
+ public void testDictionaryNameFinder() {
+
+ Set<String> expectedLocations = new HashSet<>();
+ Collections.addAll(expectedLocations, "London", "Stockholm", "Copenhagen",
+ "New York");
+
+ try {
+ CAS cas = AE.newCAS();
+ CasUtil.deserializeXmiCAS(cas, DictionaryResourceTest.class
+ .getResourceAsStream("/cas/dictionary-test.xmi"));
+ AE.process(cas);
+ Type locationType = cas.getTypeSystem().getType("opennlp.uima.Location");
+ FSIterator<AnnotationFS> locationIterator = cas
+ .getAnnotationIndex(locationType).iterator();
+
+ while (locationIterator.isValid()) {
+ AnnotationFS annotationFS = locationIterator.get();
+ assertTrue(expectedLocations.contains(annotationFS.getCoveredText()));
+ expectedLocations.remove(annotationFS.getCoveredText());
+ locationIterator.moveToNext();
+ }
+ assertEquals(0, expectedLocations.size());
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getLocalizedMessage());
+ }
+
+ }
+
+ private static AnalysisEngine produceAE(String descName)
+ throws IOException, InvalidXMLException, ResourceInitializationException
{
+ File descFile = new File(PATHNAME + descName);
+ XMLInputSource in = new XMLInputSource(descFile);
+ ResourceSpecifier specifier = UIMAFramework.getXMLParser()
+ .parseResourceSpecifier(in);
+ return UIMAFramework.produceAnalysisEngine(specifier);
+ }
+
+}
Added: opennlp/trunk/opennlp-uima/src/test/resources/cas/dictionary-test.xmi
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/resources/cas/dictionary-test.xmi?rev=1704862&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
opennlp/trunk/opennlp-uima/src/test/resources/cas/dictionary-test.xmi
------------------------------------------------------------------------------
svn:mime-type = application/xml
Added: opennlp/trunk/opennlp-uima/src/test/resources/dictionary.dic
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/resources/dictionary.dic?rev=1704862&view=auto
==============================================================================
Binary file - no diff available.
Propchange: opennlp/trunk/opennlp-uima/src/test/resources/dictionary.dic
------------------------------------------------------------------------------
svn:mime-type = application/xml
Added:
opennlp/trunk/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml?rev=1704862&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
opennlp/trunk/opennlp-uima/src/test/resources/test-descriptors/DictionaryNameFinder.xml
------------------------------------------------------------------------------
svn:mime-type = application/xml