This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_1x by this push: new d1bc093 fix potential resource leak, continued d1bc093 is described below commit d1bc09386405d28d6b0f0a29ce8c3e7efd72d6c7 Author: tballison <talli...@mitre.org> AuthorDate: Mon Apr 16 20:13:57 2018 -0400 fix potential resource leak, continued --- .../apache/tika/parser/ocr/TesseractOCRConfig.java | 16 +++---- .../tika/parser/ocr/TesseractOCRConfigTest.java | 7 +++- .../apache/tika/server/resource/TikaResource.java | 27 ++++++++---- .../org/apache/tika/server/TikaResourceTest.java | 49 ++++++++++++++-------- 4 files changed, 64 insertions(+), 35 deletions(-) diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index c4d8958..ba00c20 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -47,10 +47,10 @@ public class TesseractOCRConfig implements Serializable { private static final long serialVersionUID = -4861942486845757891L; private static Pattern ALLOWABLE_PAGE_SEPARATORS_PATTERN = - Pattern.compile("(?i)^[-_<>A-Z0-9 \n\r]+$"); + Pattern.compile("(?i)^[-_/\\.A-Z0-9]+$"); private static Pattern ALLOWABLE_OTHER_PARAMS_PATTERN = - Pattern.compile("(?i)^[-/_<>A-Z0-9]+$"); + Pattern.compile("(?i)^[-_/\\.A-Z0-9]+$"); public enum OUTPUT_TYPE { TXT, @@ -283,7 +283,7 @@ public class TesseractOCRConfig implements Serializable { throw new IllegalArgumentException(pageSeparator + " contains illegal characters.\n"+ "If you trust this value, set it with setTrustedPageSeparator"); } - this.pageSeparator = pageSeparator; + setTrustedPageSeparator(pageSeparator); } /** @@ -456,11 +456,13 @@ public class TesseractOCRConfig implements Serializable { * Deafult value is gray. */ public void setColorspace(String colorspace) { - if (!colorspace.equals(null)) { - this.colorspace = colorspace; - } else { + if (colorspace == null) { throw new IllegalArgumentException("Colorspace value cannot be null."); } + if (! colorspace.matches("(?i)^[-_A-Z0-9]+$")) { + throw new IllegalArgumentException("colorspace must match this pattern: (?i)^[-_A-Z0-9]+$"); + } + this.colorspace = colorspace; } /** @@ -575,7 +577,7 @@ public class TesseractOCRConfig implements Serializable { Matcher m = ALLOWABLE_OTHER_PARAMS_PATTERN.matcher(key); if (! m.find()) { - throw new IllegalArgumentException("Value contains illegal characters: "+key); + throw new IllegalArgumentException("Key contains illegal characters: "+key); } m.reset(value); if (! m.find()) { diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java index e201aa2..7517f7b 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRConfigTest.java @@ -216,10 +216,15 @@ public class TesseractOCRConfigTest extends TikaTest { @Test public void testBogusPathCheck() { + //allow path that doesn't actually exist TesseractOCRConfig config = new TesseractOCRConfig(); config.setTesseractPath("blahdeblahblah"); assertEquals("blahdeblahblah", config.getTesseractPath()); } - + @Test(expected=IllegalArgumentException.class) + public void testBadColorSpace() { + TesseractOCRConfig config = new TesseractOCRConfig(); + config.setColorspace("someth!ng"); + } } diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java index d8795f7..b7f857e 100644 --- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java +++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java @@ -85,7 +85,7 @@ import org.xml.sax.SAXException; @Path("/tika") public class TikaResource { - private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-_<>A-Z0-9 ]+$"); + private static Pattern ALLOWABLE_HEADER_CHARS = Pattern.compile("(?i)^[-/_\\.A-Z0-9 ]+$"); public static final String GREETING = "This is Tika Server (" + new Tika().toString() + "). Please PUT\n"; public static final String X_TIKA_OCR_HEADER_PREFIX = "X-Tika-OCR"; @@ -196,11 +196,10 @@ public class TikaResource { */ private static void processHeaderConfig(MultivaluedMap<String, String> httpHeaders, Object object, String key, String prefix) { - try { - String property = StringUtils.removeStart(key, prefix); + try {String property = StringUtils.removeStart(key, prefix); Field field = null; try { - object.getClass().getDeclaredField(StringUtils.uncapitalize(property)); + field = object.getClass().getDeclaredField(StringUtils.uncapitalize(property)); } catch (NoSuchFieldException e) { //swallow } @@ -211,12 +210,20 @@ public class TikaResource { //try that. Class clazz = String.class; if (field != null) { - if (field.getType() == int.class) { + if (field.getType() == int.class || field.getType() == Integer.class) { clazz = int.class; } else if (field.getType() == double.class) { clazz = double.class; + } else if (field.getType() == Double.class) { + clazz = Double.class; + } else if (field.getType() == float.class) { + clazz = float.class; + } else if (field.getType() == Float.class) { + clazz = Float.class; } else if (field.getType() == boolean.class) { clazz = boolean.class; + } else if (field.getType() == Boolean.class) { + clazz = Boolean.class; } } @@ -233,14 +240,16 @@ public class TikaResource { if (clazz == String.class) { checkTrustWorthy(setter, val); m.invoke(object, val); - } else if (clazz == int.class) { + } else if (clazz == int.class || clazz == Integer.class) { m.invoke(object, Integer.parseInt(val)); - } else if (clazz == double.class) { + } else if (clazz == double.class || clazz == Double.class) { m.invoke(object, Double.parseDouble(val)); - } else if (clazz == boolean.class) { + } else if (clazz == boolean.class || clazz == Boolean.class) { m.invoke(object, Boolean.parseBoolean(val)); + } else if (clazz == float.class || clazz == Float.class) { + m.invoke(object, Float.parseFloat(val)); } else { - throw new IllegalArgumentException("setter must be String, int, double or boolean...for now"); + throw new IllegalArgumentException("setter must be String, int, float, double or boolean...for now"); } } else { throw new NoSuchMethodException("Couldn't find: "+setter); diff --git a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java index bd86fd8..295ce74 100644 --- a/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java +++ b/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java @@ -17,15 +17,6 @@ package org.apache.tika.server; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import javax.ws.rs.core.Response; -import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - import org.apache.cxf.jaxrs.JAXRSServerFactoryBean; import org.apache.cxf.jaxrs.client.WebClient; import org.apache.cxf.jaxrs.ext.multipart.Attachment; @@ -35,6 +26,15 @@ import org.apache.tika.parser.ocr.TesseractOCRParser; import org.apache.tika.server.resource.TikaResource; import org.junit.Test; +import javax.ws.rs.core.Response; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + public class TikaResourceTest extends CXFTestBase { public static final String TEST_DOC = "test.doc"; public static final String TEST_PASSWORD_PROTECTED = "password.xls"; @@ -304,14 +304,27 @@ public class TikaResourceTest extends CXFTestBase { @Test public void testTrustedMethodPrevention() { - Response response = WebClient.create(endPoint + TIKA_PATH) - .type("application/pdf") - .accept("text/plain") - .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX + - "trustedPageSeparator", - "\u0010") - .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); - assertEquals(500, response.getStatus()); + Response response = WebClient.create(endPoint + TIKA_PATH) + .type("application/pdf") + .accept("text/plain") + .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX + + "trustedPageSeparator", + "\u0010") + .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); + assertEquals(500, response.getStatus()); - } + } + + @Test + public void testFloatInHeader() { + Response response = WebClient.create(endPoint + TIKA_PATH) + .type("application/pdf") + .accept("text/plain") + .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX + + "averageCharTolerance", + "2.0") + .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); + assertEquals(200, response.getStatus()); + + } } -- To stop receiving notification emails like this one, please contact talli...@apache.org.