Repository: tika
Updated Branches:
  refs/heads/master a35320069 -> 2d06bc2c3


read all entities from NLTKRest


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ab09b0c6
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ab09b0c6
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ab09b0c6

Branch: refs/heads/master
Commit: ab09b0c6b3fc76f414dcaa464a2dbb58f52e0875
Parents: d184e9b
Author: manali <[email protected]>
Authored: Tue Apr 12 00:32:22 2016 -0700
Committer: manali <[email protected]>
Committed: Tue Apr 12 00:32:22 2016 -0700

----------------------------------------------------------------------
 .../tika/parser/ner/nltk/NLTKNERecogniser.java  | 26 +++++++++-----------
 1 file changed, 12 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/ab09b0c6/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
index 5407189..7b474eb 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -23,12 +23,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Collection;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Properties;
+import java.util.*;
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 
@@ -38,7 +33,7 @@ import org.apache.cxf.jaxrs.client.WebClient;
  *  This class offers an implementation of {@link NERecogniser} based on
  *  ne_chunk() module of NLTK. This NER requires additional setup,
  *  due to Http requests to an endpoint server that runs NLTK.
- *  See <a href="http://wiki.apache.org/tika/TikaAndNER#NLTK";>
+ *  See <a href="http://wiki.apache.org/tika/TikaAndNLTK";>
  *
  */
 public class NLTKNERecogniser implements NERecogniser {
@@ -47,7 +42,7 @@ public class NLTKNERecogniser implements NERecogniser {
     private static boolean available = false;
     private static final String NLTK_REST_HOST = "http://localhost:8881";;
     private String restHostUrlStr;
-     /**
+    /**
      * some common entities identified by NLTK
      */
     public static final Set<String> ENTITY_TYPES = new HashSet<String>(){{
@@ -71,9 +66,6 @@ public class NLTKNERecogniser implements NERecogniser {
                 this.restHostUrlStr = restHostUrlStr;
             }
 
-
-
-
             Response response = 
WebClient.create(restHostUrlStr).accept(MediaType.TEXT_HTML).get();
             int responseCode = response.getStatus();
             if(responseCode == 200){
@@ -127,14 +119,20 @@ public class NLTKNERecogniser implements NERecogniser {
                 String result = response.readEntity(String.class);
                 JSONParser parser = new JSONParser();
                 JSONObject j = (JSONObject) parser.parse(result);
-                Set s = entities.put("NAMES", new HashSet((Collection) 
j.get("names")));
+                Iterator<?> keys = j.keySet().iterator();
+                while( keys.hasNext() ) {
+                    String key = (String)keys.next();
+                    if ( !key.equals("result") ) {
+                        ENTITY_TYPES.add(key);
+                        entities.put(key.toUpperCase(Locale.ENGLISH), new 
HashSet((Collection) j.get(key)));
+                    }
+                }
             }
         }
         catch (Exception e) {
             LOG.debug(e.getMessage(), e);
         }
-        ENTITY_TYPES.clear();
-        ENTITY_TYPES.addAll(entities.keySet());
+
         return entities;
     }
 

Reply via email to