Repository: tika Updated Branches: refs/heads/master a35320069 -> 2d06bc2c3
read all entities from NLTKRest Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ab09b0c6 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ab09b0c6 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ab09b0c6 Branch: refs/heads/master Commit: ab09b0c6b3fc76f414dcaa464a2dbb58f52e0875 Parents: d184e9b Author: manali <[email protected]> Authored: Tue Apr 12 00:32:22 2016 -0700 Committer: manali <[email protected]> Committed: Tue Apr 12 00:32:22 2016 -0700 ---------------------------------------------------------------------- .../tika/parser/ner/nltk/NLTKNERecogniser.java | 26 +++++++++----------- 1 file changed, 12 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/ab09b0c6/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java index 5407189..7b474eb 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java @@ -23,12 +23,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.util.Set; -import java.util.HashSet; -import java.util.Collection; -import java.util.Map; -import java.util.HashMap; -import java.util.Properties; +import java.util.*; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -38,7 +33,7 @@ import org.apache.cxf.jaxrs.client.WebClient; * This class offers an implementation of {@link NERecogniser} based on * ne_chunk() module of NLTK. This NER requires additional setup, * due to Http requests to an endpoint server that runs NLTK. - * See <a href="http://wiki.apache.org/tika/TikaAndNER#NLTK"> + * See <a href="http://wiki.apache.org/tika/TikaAndNLTK"> * */ public class NLTKNERecogniser implements NERecogniser { @@ -47,7 +42,7 @@ public class NLTKNERecogniser implements NERecogniser { private static boolean available = false; private static final String NLTK_REST_HOST = "http://localhost:8881"; private String restHostUrlStr; - /** + /** * some common entities identified by NLTK */ public static final Set<String> ENTITY_TYPES = new HashSet<String>(){{ @@ -71,9 +66,6 @@ public class NLTKNERecogniser implements NERecogniser { this.restHostUrlStr = restHostUrlStr; } - - - Response response = WebClient.create(restHostUrlStr).accept(MediaType.TEXT_HTML).get(); int responseCode = response.getStatus(); if(responseCode == 200){ @@ -127,14 +119,20 @@ public class NLTKNERecogniser implements NERecogniser { String result = response.readEntity(String.class); JSONParser parser = new JSONParser(); JSONObject j = (JSONObject) parser.parse(result); - Set s = entities.put("NAMES", new HashSet((Collection) j.get("names"))); + Iterator<?> keys = j.keySet().iterator(); + while( keys.hasNext() ) { + String key = (String)keys.next(); + if ( !key.equals("result") ) { + ENTITY_TYPES.add(key); + entities.put(key.toUpperCase(Locale.ENGLISH), new HashSet((Collection) j.get(key))); + } + } } } catch (Exception e) { LOG.debug(e.getMessage(), e); } - ENTITY_TYPES.clear(); - ENTITY_TYPES.addAll(entities.keySet()); + return entities; }
