Author: joern
Date: Tue Oct 20 11:39:00 2015
New Revision: 1709573
URL: http://svn.apache.org/viewvc?rev=1709573&view=rev
Log:
OPENNLP-822 The model now always includes the default name finder configuration
when trained without.
Added:
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java?rev=1709573&r1=1709572&r2=1709573&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderFactory.java
Tue Oct 20 11:39:00 2015
@@ -18,6 +18,7 @@
package opennlp.tools.namefind;
import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
@@ -48,6 +49,7 @@ public class TokenNameFinderFactory exte
*/
public TokenNameFinderFactory() {
this.seqCodec = new BioCodec();
+ featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
}
public TokenNameFinderFactory(byte[] featureGeneratorBytes, final
Map<String, Object> resources,
@@ -59,8 +61,35 @@ public class TokenNameFinderFactory exte
this.featureGeneratorBytes = featureGeneratorBytes;
this.resources = resources;
this.seqCodec = seqCodec;
+
+ if (this.featureGeneratorBytes == null) {
+ this.featureGeneratorBytes = loadDefaultFeatureGeneratorBytes();
+ }
}
+ private static byte[] loadDefaultFeatureGeneratorBytes() {
+
+ ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+ try (InputStream in = TokenNameFinderFactory.class.getResourceAsStream(
+ "/opennlp/tools/namefind/ner-default-features.xml")) {
+
+ if (in == null) {
+ throw new IllegalStateException("Classpath must contain
ner-default-features.xml file!");
+ }
+
+ byte buf[] = new byte[1024];
+ int len;
+ while ((len = in.read(buf)) > 0) {
+ bytes.write(buf, 0, len);
+ }
+ }
+ catch (IOException e) {
+ throw new IllegalStateException("Failed reading from
ner-default-features.xml file on classpath!");
+ }
+
+ return bytes.toByteArray();
+ }
+
protected SequenceCodec<String> getSequenceCodec() {
return seqCodec;
}
Added:
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml?rev=1709573&view=auto
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
(added)
+++
opennlp/trunk/opennlp-tools/src/main/resources/opennlp/tools/namefind/ner-default-features.xml
Tue Oct 20 11:39:00 2015
@@ -0,0 +1,36 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!-- Default name finder feature generator configuration -->
+<generators>
+ <cache>
+ <generators>
+ <window prevLength = "2" nextLength = "2">
+ <tokenclass/>
+ </window>
+ <window prevLength = "2" nextLength = "2">
+ <token/>
+ </window>
+ <definition/>
+ <prevmap/>
+ <bigram/>
+ <sentence begin="true" end="false"/>
+ </generators>
+ </cache>
+</generators>
\ No newline at end of file