Used Apache CXF WebClient
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/14ca3204 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/14ca3204 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/14ca3204 Branch: refs/heads/master Commit: 14ca32045361918f5dd28c63c9692accbcfa31d5 Parents: 892beca Author: manali <[email protected]> Authored: Sat Feb 6 17:00:38 2016 -0800 Committer: manali <[email protected]> Committed: Sat Feb 6 17:00:38 2016 -0800 ---------------------------------------------------------------------- .../apache/tika/parser/ner/NERecogniser.java | 2 ++ .../tika/parser/ner/nltk/NLTKNERecogniser.java | 34 +++++++------------- .../parser/ner/nltk/NLTKNERecogniserTest.java | 7 ++-- 3 files changed, 18 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/14ca3204/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java index c4693eb..3bebff2 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/NERecogniser.java @@ -36,6 +36,8 @@ public interface NERecogniser { String DATE = "DATE"; String PERCENT = "PERCENT"; String MONEY = "MONEY"; + String FACILITY = "FACILITY"; + String GPE = "GPE"; /** * checks if this Named Entity recogniser is available for service http://git-wip-us.apache.org/repos/asf/tika/blob/14ca3204/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java index cb152f3..99cde6f 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java @@ -41,6 +41,13 @@ import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.message.BasicNameValuePair; +import javax.ws.rs.core.Form; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +import org.apache.cxf.jaxrs.client.WebClient; +import org.apache.cxf.jaxrs.ext.multipart.ContentDisposition; +import org.apache.cxf.jaxrs.ext.multipart.MultipartBody; /** * This class offers an implementation of {@link NERecogniser} based on @@ -73,13 +80,8 @@ public class NLTKNERecogniser implements NERecogniser { public NLTKNERecogniser(){ try { String url = "http://localhost:5000/"; - HttpClient client = HttpClientBuilder.create().build(); - HttpGet get = new HttpGet(url); - - // add header - get.setHeader("User-Agent", USER_AGENT); - HttpResponse response = client.execute(get); - int responseCode = response.getStatusLine().getStatusCode(); + Response response = WebClient.create(url).accept(MediaType.TEXT_HTML).get(); + int responseCode = response.getStatus(); if(responseCode == 200){ available = true; } @@ -118,22 +120,10 @@ public class NLTKNERecogniser implements NERecogniser { Map<String, Set<String>> entities = new HashMap<>(); try { String url = "http://localhost:5000/nltk"; - HttpClient client = HttpClientBuilder.create().build(); - HttpPost post = new HttpPost(url); - post.setHeader("User-Agent", USER_AGENT); - List<NameValuePair> urlParameters = new ArrayList<NameValuePair>(); - urlParameters.add(new BasicNameValuePair("text", text)); - post.setEntity(new UrlEncodedFormEntity(urlParameters)); - - HttpResponse response = client.execute(post); - - int responseCode = response.getStatusLine().getStatusCode(); + Response response = WebClient.create(url).accept(MediaType.TEXT_HTML).form(new Form().param("text",text)); + int responseCode = response.getStatus(); if (responseCode == 200) { - BufferedReader rd = new BufferedReader( - new InputStreamReader(response.getEntity().getContent())); - - String result = rd.readLine(); - + String result = response.readEntity(String.class); JSONParser parser = new JSONParser(); JSONObject j = (JSONObject) parser.parse(result); JSONArray aa = new JSONArray(); http://git-wip-us.apache.org/repos/asf/tika/blob/14ca3204/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java index ac04066..563e836 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java @@ -21,6 +21,7 @@ import org.apache.tika.Tika; import org.apache.tika.config.TikaConfig; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ner.NamedEntityParser; +import org.junit.Ignore; import org.junit.Test; import java.io.ByteArrayInputStream; @@ -34,7 +35,6 @@ import static org.junit.Assert.assertTrue; public class NLTKNERecogniserTest { @Test public void testGetEntityTypes() throws Exception { - String text = "America"; System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, NLTKNERecogniser.class.getName()); @@ -42,9 +42,10 @@ public class NLTKNERecogniserTest { Metadata md = new Metadata(); tika.parse(new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), md); - Set<String> gpe = new HashSet<>(Arrays.asList(md.getValues("NER_GPE"))); - if(gpe.size() == 0) return; + if(gpe.size() == 0) { + return; + } else { assertTrue(gpe.contains("America")); assertTrue(gpe.size() == 1); //and nothing else
