Author: joern
Date: Tue Oct 20 11:39:00 2015
New Revision: 1709573

URL: http://svn.apache.org/viewvc?rev=1709573&view=rev
Log:
OPENNLP-822 The model now always includes the default name finder configuration 
when trained without.

Added:
    opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/
    
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java?rev=1709573&r1=1709572&r2=1709573&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
 Tue Oct 20 11:39:00 2015
@@ -18,6 +18,7 @@
 package opennlp.tools.namefind;
 
 import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Map;
@@ -48,6 +49,7 @@ public class TokenNameFinderFactory exte
    */
   public TokenNameFinderFactory() {
     this.seqCodec = new BioCodec();
+    featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
   }
 
   public TokenNameFinderFactory(byte[] featureGeneratorBytes, final 
Map<String, Object> resources,
@@ -59,8 +61,35 @@ public class TokenNameFinderFactory exte
     this.featureGeneratorBytes = featureGeneratorBytes;
     this.resources = resources;
     this.seqCodec = seqCodec;
+    
+    if (this.featureGeneratorBytes == null) {
+      this.featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+    }
   }
 
+  private static byte[] loadDefaultFeatureGeneratorBytes() {
+    
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
+        "/opennlp/tools/namefind/ner-default-features.xml")) {
+      
+      if (in == null) {
+        throw new IllegalStateException("Classpath must contain 
ner-default-features.xml file!");
+      }
+      
+      byte buf[] = new byte[1024];
+      int len;
+      while ((len = in.read(buf)) > 0) {
+        bytes.write(buf, 0, len);
+      }
+    }
+    catch (IOException e) {
+      throw new IllegalStateException("Failed reading from 
ner-default-features.xml file on classpath!");
+    }
+    
+    return bytes.toByteArray();
+  }
+  
   protected SequenceCodec<String> getSequenceCodec() {
     return seqCodec;
   }

Added: 
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml?rev=1709573&view=auto
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
 (added)
+++ 
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
 Tue Oct 20 11:39:00 2015
@@ -0,0 +1,36 @@
+<!--
+       Licensed to the Apache Software Foundation (ASF) under one
+       or more contributor license agreements.  See the NOTICE file
+       distributed with this work for additional information
+       regarding copyright ownership.  The ASF licenses this file
+       to you under the Apache License, Version 2.0 (the
+       "License"); you may not use this file except in compliance
+       with the License.  You may obtain a copy of the License at
+       
+       http://www.apache.org/licenses/LICENSE-2.0
+       
+       Unless required by applicable law or agreed to in writing,
+       software distributed under the License is distributed on an
+       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+       KIND, either express or implied.  See the License for the
+       specific language governing permissions and limitations
+       under the License.
+-->
+
+<!-- Default name finder feature generator configuration -->
+<generators>
+  <cache> 
+    <generators>
+      <window prevLength = "2" nextLength = "2">          
+        <tokenclass/>
+      </window>
+      <window prevLength = "2" nextLength = "2">                
+        <token/>
+      </window>
+      <definition/>
+      <prevmap/>
+      <bigram/>
+      <sentence begin="true" end="false"/>
+    </generators>
+  </cache> 
+</generators>
\ No newline at end of file


Reply via email to