Repository: tika
Updated Branches:
  refs/heads/2.x 2a7e52ec4 -> 573527bbc


TIKA-1978 Invocation of java.net.URL.equals(Object), which blocks to do domain 
name resolution, in org.apache.tika.parser.geo.topic.GeoParser.initialize(URL) 
2.x branch


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/bd3ecfcd
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/bd3ecfcd
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/bd3ecfcd

Branch: refs/heads/2.x
Commit: bd3ecfcddeaf13262e477ba29c5256ebd44e32db
Parents: 43e3000
Author: Lewis John McGibbney <[email protected]>
Authored: Thu May 26 11:15:02 2016 -0700
Committer: Lewis John McGibbney <[email protected]>
Committed: Thu May 26 11:15:02 2016 -0700

----------------------------------------------------------------------
 .../apache/tika/parser/geo/topic/GeoParser.java | 43 +++++++++++---------
 .../tika/parser/geo/topic/GeoParserConfig.java  |  4 +-
 .../apache/tika/parser/geo/topic/GeoTag.java    | 33 +++++++--------
 .../parser/geo/topic/NameEntityExtractor.java   | 11 ++---
 4 files changed, 48 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/bd3ecfcd/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
index eaef6ad..303f878 100644
--- 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
+++ 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParser.java
@@ -20,19 +20,21 @@ package org.apache.tika.parser.geo.topic;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
+import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import org.apache.commons.exec.CommandLine;
 import org.apache.commons.exec.DefaultExecutor;
-import org.apache.commons.exec.ExecuteException;
 import org.apache.commons.exec.ExecuteWatchdog;
 import org.apache.commons.exec.PumpStreamHandler;
-import org.apache.commons.exec.environment.EnvironmentUtils;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -57,7 +59,7 @@ public class GeoParser extends AbstractParser {
 
     private boolean initialized;
     private URL modelUrl;
-    private NameEntityExtractor extractor;
+    private transient NameEntityExtractor extractor;
     private boolean available;
 
     @Override
@@ -70,9 +72,12 @@ public class GeoParser extends AbstractParser {
      * @param modelUrl the URL to NER model
      */
     public void initialize(URL modelUrl) {
-        if (this.modelUrl != null && this.modelUrl.equals(modelUrl)) {
-            // Previously initialized for the same URL, no initialization 
needed
-            return;
+        try {
+            if (this.modelUrl != null && 
this.modelUrl.toURI().equals(modelUrl.toURI())) {
+                return;
+            }
+        } catch (URISyntaxException e1) {
+            LOG.log(Level.SEVERE, e1.getMessage(), e1);
         }
         
         this.modelUrl = modelUrl;
@@ -112,7 +117,7 @@ public class GeoParser extends AbstractParser {
         String bestner = extractor.bestNameEntity;
 
         /*------------------------resolve geonames for each ner, store results 
in a hashmap---------------------*/
-        HashMap<String, ArrayList<String>> resolvedGeonames = 
searchGeoNames(locationNameEntities);
+        HashMap<String, ArrayList<String>> resolvedGeonames = (HashMap<String, 
ArrayList<String>>) searchGeoNames(locationNameEntities);
 
         /*----------------store locationNameEntities and their geonames in a 
geotag, each input has one geotag---------------------*/
         GeoTag geotag = new GeoTag();
@@ -120,22 +125,21 @@ public class GeoParser extends AbstractParser {
 
         /* add resolved entities in metadata */
 
-        metadata.add("Geographic_NAME", geotag.Geographic_NAME);
-        metadata.add("Geographic_LONGITUDE", geotag.Geographic_LONGTITUDE);
-        metadata.add("Geographic_LATITUDE", geotag.Geographic_LATITUDE);
+        metadata.add("Geographic_NAME", geotag.geoNAME);
+        metadata.add("Geographic_LONGITUDE", geotag.geoLONGTITUDE);
+        metadata.add("Geographic_LATITUDE", geotag.geoLATITUDE);
         for (int i = 0; i < geotag.alternatives.size(); ++i) {
-            GeoTag alter = (GeoTag) geotag.alternatives.get(i);
-            metadata.add("Optional_NAME" + (i + 1), alter.Geographic_NAME);
+            GeoTag alter = geotag.alternatives.get(i);
+            metadata.add("Optional_NAME" + (i + 1), alter.geoNAME);
             metadata.add("Optional_LONGITUDE" + (i + 1),
-                         alter.Geographic_LONGTITUDE);
+                         alter.geoLONGTITUDE);
             metadata.add("Optional_LATITUDE" + (i + 1),
-                         alter.Geographic_LATITUDE);
+                         alter.geoLATITUDE);
         }
     }
 
-    public HashMap<String, ArrayList<String>> searchGeoNames(
-            ArrayList<String> locationNameEntities) throws ExecuteException,
-            IOException {
+    public Map<String, ArrayList<String>> searchGeoNames(
+            List<String> locationNameEntities) throws IOException {
         CommandLine cmdLine = new CommandLine("lucene-geo-gazetteer");
         ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
         cmdLine.addArgument("-s");
@@ -150,17 +154,16 @@ public class GeoParser extends AbstractParser {
         exec.setWatchdog(watchdog);
         PumpStreamHandler streamHandler = new PumpStreamHandler(outputStream);
         exec.setStreamHandler(streamHandler);
-        int exitValue = exec.execute(cmdLine, 
EnvironmentUtils.getProcEnvironment());
         String outputJson = outputStream.toString("UTF-8");
         JSONArray json = (JSONArray) JSONValue.parse(outputJson);
 
-        HashMap<String, ArrayList<String>> returnHash = new HashMap<String, 
ArrayList<String>>();
+        HashMap<String, ArrayList<String>> returnHash = new HashMap<>();
         for (int i = 0; i < json.size(); i++) {
             JSONObject obj = (JSONObject) json.get(i);
             for (Object key : obj.keySet()) {
                 String theKey = (String) key;
                 JSONArray vals = (JSONArray) obj.get(theKey);
-                ArrayList<String> stringVals = new ArrayList<String>(
+                ArrayList<String> stringVals = new ArrayList<>(
                         vals.size());
                 for (int j = 0; j < vals.size(); j++) {
                     String val = (String) vals.get(j);

http://git-wip-us.apache.org/repos/asf/tika/blob/bd3ecfcd/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
index 305e663..56272e1 100644
--- 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
+++ 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoParserConfig.java
@@ -30,7 +30,7 @@ public class GeoParserConfig implements Serializable {
         this.nerModelUrl = 
GeoParserConfig.class.getResource("en-ner-location.bin");
     }
 
-    public void setNERModelPath(String path) {
+    public void setNERModelPath(String path) throws MalformedURLException {
         if (path == null)
             return;
         File file = new File(path);
@@ -40,7 +40,7 @@ public class GeoParserConfig implements Serializable {
         try {
             this.nerModelUrl = file.toURI().toURL();
         } catch (MalformedURLException e) {
-            throw new RuntimeException(e);
+            throw new MalformedURLException(e.getMessage());
         }
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/bd3ecfcd/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoTag.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoTag.java
 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoTag.java
index bccaef1..fe4b9c6 100644
--- 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoTag.java
+++ 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/GeoTag.java
@@ -18,18 +18,19 @@
 package org.apache.tika.parser.geo.topic;
 
 import java.util.ArrayList;
-import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
 
 public class GeoTag {
-       String Geographic_NAME;
-       String Geographic_LONGTITUDE;
-       String Geographic_LATITUDE;
-       ArrayList<GeoTag> alternatives = new ArrayList<GeoTag>();
+       String geoNAME;
+       String geoLONGTITUDE;
+       String geoLATITUDE;
+       ArrayList<GeoTag> alternatives = new ArrayList<>();
 
        public void setMain(String name, String longitude, String latitude) {
-               Geographic_NAME = name;
-               Geographic_LONGTITUDE = longitude;
-               Geographic_LATITUDE = latitude;
+               geoNAME = name;
+               geoLONGTITUDE = longitude;
+               geoLATITUDE = latitude;
        }
 
        public void addAlternative(GeoTag geotag) {
@@ -44,20 +45,20 @@ public class GeoTag {
         * @param bestNER best name entity among all the extracted entities for 
the
         * input stream
         */
-       public void toGeoTag(HashMap<String, ArrayList<String>> 
resolvedGeonames,
+       public void toGeoTag(Map<String, ArrayList<String>> resolvedGeonames,
                        String bestNER) {
 
-               for (String key : resolvedGeonames.keySet()) {
+               for (Entry<String, ArrayList<String>> key : 
resolvedGeonames.entrySet()) {
                        ArrayList<String> cur = resolvedGeonames.get(key);
                        if (key.equals(bestNER)) {
-                               this.Geographic_NAME = cur.get(0);
-                               this.Geographic_LONGTITUDE = cur.get(1);
-                               this.Geographic_LATITUDE = cur.get(2);
+                               this.geoNAME = cur.get(0);
+                               this.geoLONGTITUDE = cur.get(1);
+                               this.geoLATITUDE = cur.get(2);
                        } else {
                                GeoTag alter = new GeoTag();
-                               alter.Geographic_NAME = cur.get(0);
-                               alter.Geographic_LONGTITUDE = cur.get(1);
-                               alter.Geographic_LATITUDE = cur.get(2);
+                               alter.geoNAME = cur.get(0);
+                               alter.geoLONGTITUDE = cur.get(1);
+                               alter.geoLATITUDE = cur.get(2);
                                this.addAlternative(alter);
                        }
                }

http://git-wip-us.apache.org/repos/asf/tika/blob/bd3ecfcd/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
index 3c6f0e8..822d343 100644
--- 
a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
+++ 
b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/geo/topic/NameEntityExtractor.java
@@ -43,11 +43,11 @@ public class NameEntityExtractor {
     private final NameFinderME nameFinder;
 
     public NameEntityExtractor(URL modelUrl) throws IOException {
-        this.locationNameEntities = new ArrayList<String>();
+        this.locationNameEntities = new ArrayList<>();
         this.bestNameEntity = null;
         TokenNameFinderModel model = new TokenNameFinderModel(modelUrl);
         this.nameFinder = new NameFinderME(model);
-        this.tf = new HashMap<String, Integer>();
+        this.tf = new HashMap<>();
     }
 
     /*
@@ -59,7 +59,7 @@ public class NameEntityExtractor {
      */
     public void getAllNameEntitiesfromInput(InputStream stream) throws 
IOException {
         String[] in = IOUtils.toString(stream, UTF_8).split(" ");
-        Span nameE[];
+        Span[] nameE;
         
         //name finder is not thread safe 
https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
         synchronized (nameFinder) {
@@ -89,7 +89,7 @@ public class NameEntityExtractor {
      * ArrayList
      */
     public void getBestNameEntity() {
-        if (this.locationNameEntities.size() == 0)
+        if (this.locationNameEntities.isEmpty())
             return;
 
         for (int i = 0; i < this.locationNameEntities.size(); ++i) {
@@ -100,10 +100,11 @@ public class NameEntityExtractor {
                 tf.put(this.locationNameEntities.get(i), 1);
         }
         int max = 0;
-        List<Map.Entry<String, Integer>> list = new 
ArrayList<Map.Entry<String, Integer>>(
+        List<Map.Entry<String, Integer>> list = new ArrayList<>(
                 tf.entrySet());
         Collections.shuffle(list);
         Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
+            @Override
             public int compare(Map.Entry<String, Integer> o1,
                     Map.Entry<String, Integer> o2) {
                 // Descending Order

Reply via email to