OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/b41fcd69 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/b41fcd69 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/b41fcd69 Branch: refs/heads/parser_regression Commit: b41fcd69baef80ed1e99656e9a3b7424aa294bb8 Parents: a2049d6 Author: Jörn Kottmann <jo...@apache.org> Authored: Thu Feb 2 19:13:02 2017 +0100 Committer: Jörn Kottmann <jo...@apache.org> Committed: Sun Apr 16 19:24:52 2017 +0200 ---------------------------------------------------------------------- .../tagdict/MorfologikPOSTaggerFactory.java | 15 +------ .../tools/namefind/TokenNameFinderModel.java | 14 +----- .../opennlp/tools/util/model/BaseModel.java | 2 + .../tools/util/model/ByteArraySerializer.java | 33 ++++++++++++++ .../util/model/ByteArraySerializerTest.java | 45 ++++++++++++++++++++ 5 files changed, 82 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java index 370b4d0..592ef7d 100644 --- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java +++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.Map; @@ -33,7 +32,7 @@ import opennlp.tools.dictionary.Dictionary; import opennlp.tools.postag.POSTaggerFactory; import opennlp.tools.postag.TagDictionary; import opennlp.tools.util.model.ArtifactSerializer; -import opennlp.tools.util.model.ModelUtil; +import opennlp.tools.util.model.ByteArraySerializer; public class MorfologikPOSTaggerFactory extends POSTaggerFactory { @@ -150,16 +149,4 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory { info)); return new MorfologikTagDictionary(dict); } - - static class ByteArraySerializer implements ArtifactSerializer<byte[]> { - - public byte[] create(InputStream in) throws IOException { - return ModelUtil.read(in); - } - - public void serialize(byte[] artifact, OutputStream out) throws IOException { - out.write(artifact); - } - } - } http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java index 05a3615..09eefc5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java @@ -21,7 +21,6 @@ package opennlp.tools.namefind; import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.net.URL; import java.util.Map; import java.util.Properties; @@ -36,7 +35,7 @@ import opennlp.tools.util.featuregen.BrownCluster; import opennlp.tools.util.featuregen.WordClusterDictionary; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.BaseModel; -import opennlp.tools.util.model.ModelUtil; +import opennlp.tools.util.model.ByteArraySerializer; /** * The {@link TokenNameFinderModel} is the model used @@ -53,17 +52,6 @@ public class TokenNameFinderModel extends BaseModel { } } - private static class ByteArraySerializer implements ArtifactSerializer<byte[]> { - - public byte[] create(InputStream in) throws IOException { - return ModelUtil.read(in); - } - - public void serialize(byte[] artifact, OutputStream out) throws IOException { - out.write(artifact); - } - } - private static final String COMPONENT_NAME = "NameFinderME"; private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model"; http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java index 062c787..20acd9d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java @@ -351,6 +351,8 @@ public abstract class BaseModel implements ArtifactProvider, Serializable { GenericModelSerializer.register(serializers); PropertiesSerializer.register(serializers); DictionarySerializer.register(serializers); + serializers.put("txt", new ByteArraySerializer()); + serializers.put("html", new ByteArraySerializer()); return serializers; } http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java new file mode 100644 index 0000000..aa123c4 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.model; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +public class ByteArraySerializer implements ArtifactSerializer<byte[]> { + + public byte[] create(InputStream in) throws IOException { + return ModelUtil.read(in); + } + + public void serialize(byte[] artifact, OutputStream out) throws IOException { + out.write(artifact); + } +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/b41fcd69/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java new file mode 100644 index 0000000..a0d7a35 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util.model; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +public class ByteArraySerializerTest { + + @Test + public void testSerialization() throws IOException { + + byte[] b = new byte[1024]; + new Random(23).nextBytes(b); + + ByteArraySerializer serializer = new ByteArraySerializer(); + + ByteArrayOutputStream bOut = new ByteArrayOutputStream(); + serializer.serialize(Arrays.copyOf(b, b.length), bOut) ; + + Assert.assertArrayEquals(b, bOut.toByteArray()); + Assert.assertArrayEquals(b, serializer.create(new ByteArrayInputStream(b))); + } +}