This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new af06e87  fix potential resource leak, continued
af06e87 is described below

commit af06e87f719f3c247d3da4da75e9c91dfbb25706
Author: tballison <talli...@mitre.org>
AuthorDate: Mon Apr 16 20:13:57 2018 -0400

    fix potential resource leak, continued
---
 .../apache/tika/parser/ocr/TesseractOCRConfig.java | 16 +++----
 .../tika/parser/ocr/TesseractOCRConfigTest.java    |  7 +++-
 .../apache/tika/server/resource/TikaResource.java  | 27 ++++++++----
 .../org/apache/tika/server/TikaResourceTest.java   | 49 ++++++++++++++--------
 4 files changed, 64 insertions(+), 35 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index c4d8958..ba00c20 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -47,10 +47,10 @@ public class TesseractOCRConfig implements Serializable {
     private static final long serialVersionUID = -4861942486845757891L;
 
     private static Pattern ALLOWABLE_PAGE_SEPARATORS_PATTERN =
-            Pattern.compile("(?i)^[-_<>A-Z0-9 \n\r]+$");
+            Pattern.compile("(?i)^[-_/\\.A-Z0-9]+$");
 
     private static Pattern ALLOWABLE_OTHER_PARAMS_PATTERN =
-            Pattern.compile("(?i)^[-/_<>A-Z0-9]+$");
+            Pattern.compile("(?i)^[-_/\\.A-Z0-9]+$");
 
     public enum OUTPUT_TYPE {
         TXT,
@@ -283,7 +283,7 @@ public class TesseractOCRConfig implements Serializable {
             throw new IllegalArgumentException(pageSeparator + " contains 
illegal characters.\n"+
             "If you trust this value, set it with setTrustedPageSeparator");
         }
-        this.pageSeparator = pageSeparator;
+        setTrustedPageSeparator(pageSeparator);
     }
 
     /**
@@ -456,11 +456,13 @@ public class TesseractOCRConfig implements Serializable {
      *                   Deafult value is gray.
      */
     public void setColorspace(String colorspace) {
-        if (!colorspace.equals(null)) {
-            this.colorspace = colorspace;
-        } else {
+        if (colorspace == null) {
             throw new IllegalArgumentException("Colorspace value cannot be 
null.");
         }
+        if (! colorspace.matches("(?i)^[-_A-Z0-9]+$")) {
+            throw new IllegalArgumentException("colorspace must match this 
pattern: (?i)^[-_A-Z0-9]+$");
+        }
+        this.colorspace = colorspace;
     }
 
     /**
@@ -575,7 +577,7 @@ public class TesseractOCRConfig implements Serializable {
 
         Matcher m = ALLOWABLE_OTHER_PARAMS_PATTERN.matcher(key);
         if (! m.find()) {
-            throw new IllegalArgumentException("Value contains illegal 
characters: "+key);
+            throw new IllegalArgumentException("Key contains illegal 
characters: "+key);
         }
         m.reset(value);
         if (! m.find()) {
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
index e201aa2..7517f7b 100644
--- 
a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java
@@ -216,10 +216,15 @@ public class TesseractOCRConfigTest extends TikaTest {
 
     @Test
     public void testBogusPathCheck() {
+        //allow path that doesn't actually exist
         TesseractOCRConfig config = new TesseractOCRConfig();
         config.setTesseractPath("blahdeblahblah");
         assertEquals("blahdeblahblah", config.getTesseractPath());
     }
 
-
+    @Test(expected=IllegalArgumentException.class)
+    public void testBadColorSpace() {
+        TesseractOCRConfig config = new TesseractOCRConfig();
+        config.setColorspace("someth!ng");
+    }
 }
diff --git 
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java 
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index bd93f68..be91b93 100644
--- 
a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ 
b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -83,7 +83,7 @@ import org.xml.sax.SAXException;
 @Path("/tika")
 public class TikaResource {
 
-    private static Pattern ALLOWABLE_HEADER_CHARS = 
Pattern.compile("(?i)^[-_<>A-Z0-9 ]+$");
+    private static Pattern ALLOWABLE_HEADER_CHARS = 
Pattern.compile("(?i)^[-/_\\.A-Z0-9 ]+$");
 
     public static final String GREETING = "This is Tika Server (" + new 
Tika().toString() + "). Please PUT\n";
     public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR";
@@ -190,11 +190,10 @@ public class TikaResource {
      */
     private static void processHeaderConfig(MultivaluedMap<String, String> 
httpHeaders, Object object, String key, String prefix) {
 
-        try {
-            String property = StringUtils.removeStart(key, prefix);
+        try {String property = StringUtils.removeStart(key, prefix);
             Field field = null;
             try {
-                
object.getClass().getDeclaredField(StringUtils.uncapitalize(property));
+                field = 
object.getClass().getDeclaredField(StringUtils.uncapitalize(property));
             } catch (NoSuchFieldException e) {
                 //swallow
             }
@@ -205,12 +204,20 @@ public class TikaResource {
             //try that.
             Class clazz = String.class;
             if (field != null) {
-                if (field.getType() == int.class) {
+                if (field.getType() == int.class || field.getType() == 
Integer.class) {
                     clazz = int.class;
                 } else if (field.getType() == double.class) {
                     clazz = double.class;
+                } else if (field.getType() == Double.class) {
+                    clazz = Double.class;
+                } else if (field.getType() == float.class) {
+                    clazz = float.class;
+                } else if (field.getType() == Float.class) {
+                    clazz = Float.class;
                 } else if (field.getType() == boolean.class) {
                     clazz = boolean.class;
+                } else if (field.getType() == Boolean.class) {
+                    clazz = Boolean.class;
                 }
             }
 
@@ -227,14 +234,16 @@ public class TikaResource {
                 if (clazz == String.class) {
                     checkTrustWorthy(setter, val);
                     m.invoke(object, val);
-                } else if (clazz == int.class) {
+                } else if (clazz == int.class || clazz == Integer.class) {
                     m.invoke(object, Integer.parseInt(val));
-                } else if (clazz == double.class) {
+                } else if (clazz == double.class || clazz == Double.class) {
                     m.invoke(object, Double.parseDouble(val));
-                } else if (clazz == boolean.class) {
+                } else if (clazz == boolean.class || clazz == Boolean.class) {
                     m.invoke(object, Boolean.parseBoolean(val));
+                } else if (clazz == float.class || clazz == Float.class) {
+                    m.invoke(object, Float.parseFloat(val));
                 } else {
-                    throw new IllegalArgumentException("setter must be String, 
int, double or boolean...for now");
+                    throw new IllegalArgumentException("setter must be String, 
int, float, double or boolean...for now");
                 }
             } else {
                 throw new NoSuchMethodException("Couldn't find: "+setter);
diff --git 
a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java 
b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
index bd86fd8..295ce74 100644
--- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
@@ -17,15 +17,6 @@
 
 package org.apache.tika.server;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import javax.ws.rs.core.Response;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
 import org.apache.cxf.jaxrs.client.WebClient;
 import org.apache.cxf.jaxrs.ext.multipart.Attachment;
@@ -35,6 +26,15 @@ import org.apache.tika.parser.ocr.TesseractOCRParser;
 import org.apache.tika.server.resource.TikaResource;
 import org.junit.Test;
 
+import javax.ws.rs.core.Response;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 public class TikaResourceTest extends CXFTestBase {
     public static final String TEST_DOC = "test.doc";
     public static final String TEST_PASSWORD_PROTECTED = "password.xls";
@@ -304,14 +304,27 @@ public class TikaResourceTest extends CXFTestBase {
 
     @Test
     public void testTrustedMethodPrevention() {
-            Response response = WebClient.create(endPoint + TIKA_PATH)
-                    .type("application/pdf")
-                    .accept("text/plain")
-                    .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
-                                    "trustedPageSeparator",
-                            "\u0010")
-                    .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
-            assertEquals(500, response.getStatus());
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .type("application/pdf")
+                .accept("text/plain")
+                .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX +
+                                "trustedPageSeparator",
+                        "\u0010")
+                .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+        assertEquals(500, response.getStatus());
 
-        }
+    }
+
+    @Test
+    public void testFloatInHeader() {
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .type("application/pdf")
+                .accept("text/plain")
+                .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX +
+                                "averageCharTolerance",
+                        "2.0")
+                .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf"));
+        assertEquals(200, response.getStatus());
+
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
talli...@apache.org.

Reply via email to