fix for TIKA-1876 contributed by manalishah
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/a13369b0 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/a13369b0 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/a13369b0 Branch: refs/heads/master Commit: a13369b098bea09421e35023c131adc092dcb6e4 Parents: 6c595fb Author: manali <[email protected]> Authored: Fri Feb 26 18:21:15 2016 -0800 Committer: manali <[email protected]> Committed: Fri Feb 26 18:21:15 2016 -0800 ---------------------------------------------------------------------- .../org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java | 9 --------- .../org/apache/tika/parser/ner/nltk/NLTKServer.properties | 2 +- .../apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java | 2 +- 3 files changed, 2 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/a13369b0/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java index eddddcb..1edfe28 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java @@ -45,15 +45,6 @@ public class NLTKNERecogniser implements NERecogniser { * some common entities identified by NLTK */ public static final Set<String> ENTITY_TYPES = new HashSet<String>(){{ - add(PERSON); - add(TIME); - add(LOCATION); - add(ORGANIZATION); - add(MONEY); - add(PERCENT); - add(DATE); - add(FACILITY); - add(GPE); add("NAMES"); }}; http://git-wip-us.apache.org/repos/asf/tika/blob/a13369b0/tika-parsers/src/main/resources/org/apache/tika/parser/ner/nltk/NLTKServer.properties ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/resources/org/apache/tika/parser/ner/nltk/NLTKServer.properties b/tika-parsers/src/main/resources/org/apache/tika/parser/ner/nltk/NLTKServer.properties index 24f5a2e..5909b69 100644 --- a/tika-parsers/src/main/resources/org/apache/tika/parser/ner/nltk/NLTKServer.properties +++ b/tika-parsers/src/main/resources/org/apache/tika/parser/ner/nltk/NLTKServer.properties @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -nltk.server.url=http://localhost:5000 +nltk.server.url=http://localhost:8881 http://git-wip-us.apache.org/repos/asf/tika/blob/a13369b0/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java index a40ec24..5c1307f 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java @@ -36,7 +36,7 @@ import static org.junit.Assert.assertTrue; public class NLTKNERecogniserTest { @Test public void testGetEntityTypes() throws Exception { - String text = "America"; + String text = "America is a big country."; System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName()); Tika tika = new Tika(new TikaConfig(NamedEntityParser.class.getResourceAsStream("tika-config.xml"))); Metadata md = new Metadata();
