Thanks, Dave. I think you forgot the default config file?

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Chris Mattmann, Ph.D.
Chief Architect
Instrument Software and Science Data Systems Section (398)
NASA Jet Propulsion Laboratory Pasadena, CA 91109 USA
Office: 168-519, Mailstop: 168-527
Email: [email protected]
WWW:  http://sunset.usc.edu/~mattmann/
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adjunct Associate Professor, Computer Science Department
University of Southern California, Los Angeles, CA 90089 USA
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++






-----Original Message-----
From: "[email protected]" <[email protected]>
Reply-To: "[email protected]" <[email protected]>
Date: Sunday, November 16, 2014 at 6:37 PM
To: "[email protected]" <[email protected]>
Subject: svn commit: r1640017 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
OCRConfig.java

>Author: dmeikle
>Date: Sun Nov 16 17:37:30 2014
>New Revision: 1640017
>
>URL: http://svn.apache.org/r1640017
>Log:
>TIKA-1476 - Updated TesseractOCRConfig to read from property file if
>present on classpath
>
>Modified:
>    
>tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
>OCRConfig.java
>
>Modified: 
>tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
>OCRConfig.java
>URL: 
>http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apa
>che/tika/parser/ocr/TesseractOCRConfig.java?rev=1640017&r1=1640016&r2=1640
>017&view=diff
>==========================================================================
>====
>--- 
>tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
>OCRConfig.java (original)
>+++ 
>tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/Tesseract
>OCRConfig.java Sun Nov 16 17:37:30 2014
>@@ -17,7 +17,10 @@
> package org.apache.tika.parser.ocr;
> 
> import java.io.File;
>+import java.io.IOException;
>+import java.io.InputStream;
> import java.io.Serializable;
>+import java.util.Properties;
> 
> /**
>  * Configuration for TesseractOCRParser.
>@@ -28,7 +31,11 @@ import java.io.Serializable;
>  * config.setTesseractPath(tesseractFolder);<br>
>  * parseContext.set(TesseractOCRConfig.class, config);<br>
>  * </p>
>- * 
>+ *
>+ * Parameters can also be set by creating the
>TesseractOCRConfig.properties file
>+ * and placing it in the package org/apache/tika/parser/ocr on the
>classpath.  An
>+ * example file can be found in the test resources folder:
>+ * 
><code>tika-parsers/src/test/resources/test-properties/TesseractOCRConfig-f
>ull.properties</code>.
>  * 
>  */
> public class TesseractOCRConfig implements Serializable{
>@@ -52,7 +59,58 @@ public class TesseractOCRConfig implemen
>       
>       // Maximum time (seconds) to wait for the ocring process termination
>       private int timeout = 120;
>-      
>+
>+      /**
>+       * Default contructor.
>+       */
>+      public TesseractOCRConfig() {
>+              
>init(this.getClass().getResourceAsStream("TesseractOCRConfig.properties
>"));
>+      }
>+
>+      /**
>+       * Loads properties from InputStream and then tries to close
>InputStream.
>+       * If there is an IOException, this silently swallows the exception
>+       * and goes back to the default.
>+       *
>+       * @param is
>+       */
>+      public TesseractOCRConfig(InputStream is) {
>+              init(is);
>+      }
>+
>+      private void init(InputStream is) {
>+              if (is == null) {
>+                      return;
>+              }
>+              Properties props = new Properties();
>+              try {
>+                      props.load(is);
>+              } catch (IOException e) {
>+              } finally {
>+                      if (is != null) {
>+                              try {
>+                                      is.close();
>+                              } catch (IOException e) {
>+                                      //swallow
>+                              }
>+                      }
>+              }
>+
>+              setTesseractPath(
>+                              getProp(props, "tesseractPath", 
>getTesseractPath()));
>+              setLanguage(
>+                              getProp(props, "language", getLanguage()));
>+              setPageSegMode(
>+                              getProp(props, "pageSegMode", 
>getPageSegMode()));
>+              setMinFileSizeToOcr(
>+                              getProp(props, "minFileSizeToOcr", 
>getMinFileSizeToOcr()));
>+              setMaxFileSizeToOcr(
>+                              getProp(props, "maxFileSizeToOcr", 
>getMaxFileSizeToOcr()));
>+              setTimeout(
>+                              getProp(props, "timeout", getTimeout()));
>+
>+      }
>+
>       /** @see #setTesseractPath(String tesseractPath)*/
>       public String getTesseractPath() {
>               return tesseractPath;
>@@ -62,7 +120,7 @@ public class TesseractOCRConfig implemen
>        * Set tesseract installation folder, needed if it is not on system
>path.
>        */
>       public void setTesseractPath(String tesseractPath) {
>-              if(!tesseractPath.endsWith(File.separator))
>+              if(!tesseractPath.isEmpty() && 
>!tesseractPath.endsWith(File.separator))
>                       tesseractPath += File.separator;
>               
>               this.tesseractPath = tesseractPath;
>@@ -132,5 +190,34 @@ public class TesseractOCRConfig implemen
>       public int getTimeout() {
>               return timeout;
>       }
>-      
>+
>+      /**
>+       * Get property from the properties file passed in.
>+       * @param properties properties file to read from.
>+       * @param property the property to fetch.
>+       * @param defaultMissing default parameter to use.
>+       * @return the value.
>+       */
>+      private int getProp(Properties properties, String property, int
>defaultMissing) {
>+              String p = properties.getProperty(property);
>+              if (p == null || p.isEmpty()){
>+                      return defaultMissing;
>+              }
>+              try {
>+                      return Integer.parseInt(p);
>+              } catch (Throwable ex) {
>+                      throw new RuntimeException(String.format("Cannot parse
>TesseractOCRConfig variable $s, invalid integer value", property), ex);
>+              }
>+      }
>+
>+      /**
>+       * Get property from the properties file passed in.
>+       * @param properties properties file to read from.
>+       * @param property the property to fetch.
>+       * @param defaultMissing default parameter to use.
>+       * @return the value.
>+       */
>+      private String getProp(Properties properties, String property, String
>defaultMissing) {
>+              return properties.getProperty(property, defaultMissing);
>+      }
> }
>
>

Reply via email to