updated with changes

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0dbd69ce
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0dbd69ce
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0dbd69ce

Branch: refs/heads/master
Commit: 0dbd69cef5ec603a11d7f5b52f119e3bea1550b5
Parents: 7ebe007
Author: manali <[email protected]>
Authored: Tue Mar 1 00:59:11 2016 -0800
Committer: manali <[email protected]>
Committed: Tue Mar 1 00:59:11 2016 -0800

----------------------------------------------------------------------
 .../tika/sax/RichTextContentHandlerTest.java    | 75 ++++++++++++++++++++
 .../tika/parser/ner/nltk/NLTKNERecogniser.java  | 16 +++--
 .../parser/ner/nltk/NLTKNERecogniserTest.java   |  2 +-
 3 files changed, 86 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/0dbd69ce/tika-core/src/test/java/org/apache/tika/sax/RichTextContentHandlerTest.java
----------------------------------------------------------------------
diff --git 
a/tika-core/src/test/java/org/apache/tika/sax/RichTextContentHandlerTest.java 
b/tika-core/src/test/java/org/apache/tika/sax/RichTextContentHandlerTest.java
new file mode 100644
index 0000000..257ea38
--- /dev/null
+++ 
b/tika-core/src/test/java/org/apache/tika/sax/RichTextContentHandlerTest.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayOutputStream;
+import java.io.OutputStreamWriter;
+import java.nio.charset.Charset;
+
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Test cases for the {@link RichTextContentHandler} class.
+ */
+public class RichTextContentHandlerTest {
+
+    /**
+     * Test to check img tags are detected and rich text version used.
+     */
+    @Test
+    public void aTagTest() throws Exception {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(
+                new RichTextContentHandler(
+                    new OutputStreamWriter(buffer, Charset.defaultCharset())),
+                new Metadata());
+        xhtml.startDocument();
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "", "name", "", "value");
+        xhtml.startElement("a", attributes);
+        xhtml.endDocument();
+
+        assertEquals("\n\n\n\n[bookmark: value]", 
buffer.toString(UTF_8.name()));
+    }
+
+    /**
+     * Test to check a tags are detected and rich text version used.
+     */
+    @Test
+    public void imgTagTest() throws Exception {
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(
+                new RichTextContentHandler(
+                    new OutputStreamWriter(buffer, Charset.defaultCharset())),
+                new Metadata());
+        xhtml.startDocument();
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "", "alt", "", "value");
+        xhtml.startElement("img", attributes);
+        xhtml.endDocument();
+
+        assertEquals("\n\n\n\n[image: value]", buffer.toString(UTF_8.name()));
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/tika/blob/0dbd69ce/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
index 1edfe28..5407189 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniser.java
@@ -22,8 +22,13 @@ import org.json.simple.parser.JSONParser;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.*;
-import java.util.*;
+import java.io.IOException;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Collection;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Properties;
 import javax.ws.rs.core.MediaType;
 import javax.ws.rs.core.Response;
 
@@ -41,6 +46,7 @@ public class NLTKNERecogniser implements NERecogniser {
     private static final Logger LOG = 
LoggerFactory.getLogger(NLTKNERecogniser.class);
     private static boolean available = false;
     private static final String NLTK_REST_HOST = "http://localhost:8881";;
+    private String restHostUrlStr;
      /**
      * some common entities identified by NLTK
      */
@@ -48,7 +54,7 @@ public class NLTKNERecogniser implements NERecogniser {
         add("NAMES");
     }};
 
-    String restHostUrlStr;
+
     public NLTKNERecogniser(){
         try {
 
@@ -59,8 +65,7 @@ public class NLTKNERecogniser implements NERecogniser {
                 e.printStackTrace();
             }
 
-            if (restHostUrlStr == null
-                    || (restHostUrlStr != null && restHostUrlStr.equals(""))) {
+            if (restHostUrlStr == null || restHostUrlStr.equals("")) {
                 this.restHostUrlStr = NLTK_REST_HOST;
             } else {
                 this.restHostUrlStr = restHostUrlStr;
@@ -115,7 +120,6 @@ public class NLTKNERecogniser implements NERecogniser {
     public Map<String, Set<String>> recognise(String text) {
         Map<String, Set<String>> entities = new HashMap<>();
         try {
-            int port = 8881;
             String url = restHostUrlStr + "/nltk";
             Response response = 
WebClient.create(url).accept(MediaType.TEXT_HTML).post(text);
             int responseCode = response.getStatus();

http://git-wip-us.apache.org/repos/asf/tika/blob/0dbd69ce/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
index 5c1307f..94d9a27 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/ner/nltk/NLTKNERecogniserTest.java
@@ -48,7 +48,7 @@ public class NLTKNERecogniserTest {
         }
         else {
             assertTrue(names.contains("America"));
-            assertTrue(names.size() == 1); //and nothing else
+            assertTrue(names.size() == 1); 
         }
     }
 }

Reply via email to