Repository: opennlp
Updated Branches:
  refs/heads/master e515ff474 -> 08e163ca5


OPENNLP-1092: Fix pos model serialization bug


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/08e163ca
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/08e163ca
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/08e163ca

Branch: refs/heads/master
Commit: 08e163ca5f07db9ad9460a686b7e5085b12d9128
Parents: e515ff4
Author: Jörn Kottmann <[email protected]>
Authored: Mon Jun 26 16:20:09 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Wed Jun 28 11:06:58 2017 +0200

----------------------------------------------------------------------
 .../java/opennlp/tools/postag/POSModel.java     |   9 +-
 .../namefind/TokenNameFinderModelTest.java      | 104 +++++++++++++++++++
 .../opennlp/tools/namefind/ner-pos-features.xml |  36 +++++++
 3 files changed, 148 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/08e163ca/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
index 95a41a8..d55921c 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
@@ -35,6 +35,8 @@ import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
 import opennlp.tools.util.model.ByteArraySerializer;
+import opennlp.tools.util.model.POSModelSerializer;
+import opennlp.tools.util.model.SerializableArtifact;
 
 /**
  * The {@link POSModel} is the model used
@@ -42,7 +44,7 @@ import opennlp.tools.util.model.ByteArraySerializer;
  *
  * @see POSTaggerME
  */
-public final class POSModel extends BaseModel {
+public final class POSModel extends BaseModel implements SerializableArtifact {
 
   private static final String COMPONENT_NAME = "POSTaggerME";
   static final String POS_MODEL_ENTRY_NAME = "pos.model";
@@ -178,4 +180,9 @@ public final class POSModel extends BaseModel {
       return getFactory().getDictionary();
     return null;
   }
+
+  @Override
+  public Class<POSModelSerializer> getArtifactSerializerClass() {
+    return POSModelSerializer.class;
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/08e163ca/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
new file mode 100644
index 0000000..9d58993
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderModelTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.namefind;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTaggerMETest;
+import opennlp.tools.util.MockInputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelType;
+
+public class TokenNameFinderModelTest {
+
+  @Test
+  public void testNERWithPOSModel() throws IOException {
+
+    // create a resources folder
+    Path resourcesFolder = 
Files.createTempDirectory("resources").toAbsolutePath();
+
+    // save a POS model there
+    POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT);
+    File posModelFile = new File(resourcesFolder.toFile(),"pos-model.bin");
+    FileOutputStream fos = new FileOutputStream(posModelFile);
+
+    posModel.serialize(posModelFile);
+
+    Assert.assertTrue(posModelFile.exists());
+
+    // load feature generator xml bytes
+    InputStream fgInputStream = 
this.getClass().getResourceAsStream("ner-pos-features.xml");
+    BufferedReader buffers = new BufferedReader(new 
InputStreamReader(fgInputStream));
+    String featureGeneratorString = buffers.lines().
+        collect(Collectors.joining("\n"));
+
+    // create a featuregenerator file
+    Path featureGenerator = Files.createTempFile("ner-featuregen", ".xml");
+    Files.write(featureGenerator, featureGeneratorString.getBytes());
+
+
+    Map<String, Object> resources;
+    try {
+      resources = 
TokenNameFinderTrainerTool.loadResources(resourcesFolder.toFile(),
+          featureGenerator.toAbsolutePath().toFile());
+    }
+    catch (IOException e) {
+      throw new TerminateToolException(-1, e.getMessage(), e);
+    }
+
+
+    // train a name finder
+    ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
+        new PlainTextByLineStream(new MockInputStreamFactory(
+            new File("opennlp/tools/namefind/voa1.train")), "UTF-8"));
+
+    TrainingParameters params = new TrainingParameters();
+    params.put(TrainingParameters.ITERATIONS_PARAM, 70);
+    params.put(TrainingParameters.CUTOFF_PARAM, 1);
+
+    TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, 
sampleStream,
+        params, TokenNameFinderFactory.create(null,
+            featureGeneratorString.getBytes(), resources, new BioCodec()));
+
+
+    File model = File.createTempFile("nermodel", ".bin");
+    FileOutputStream modelOut = new FileOutputStream(model);
+    nameFinderModel.serialize(modelOut);
+
+    modelOut.close();
+
+    Assert.assertTrue(model.exists());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/08e163ca/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml 
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
new file mode 100644
index 0000000..7600e38
--- /dev/null
+++ 
b/opennlp-tools/src/test/resources/opennlp/tools/namefind/ner-pos-features.xml
@@ -0,0 +1,36 @@
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License. You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<generators>
+  <cache>
+    <generators>
+      <window prevLength = "2" nextLength = "2">
+        <tokenclass/>
+      </window>
+      <window prevLength = "2" nextLength = "2">
+        <token/>
+      </window>
+      <window prevLength = "2" nextLength = "2">
+        <tokenpos model="pos-model.bin"/>
+      </window>
+      <definition/>
+      <prevmap/>
+      <bigram/>
+      <sentence begin="true" end="false"/>
+    </generators>
+  </cache>
+</generators>
\ No newline at end of file

Reply via email to