updated Javadoc for Tesseract config and parser

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6773d42d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6773d42d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6773d42d

Branch: refs/heads/master
Commit: 6773d42de77230dff621a1010ed37f0505dfa302
Parents: bc6667c
Author: Zarana Parekh <[email protected]>
Authored: Thu Jun 30 15:58:12 2016 -0700
Committer: Zarana Parekh <[email protected]>
Committed: Thu Jun 30 15:58:12 2016 -0700

----------------------------------------------------------------------
 .../tika/parser/ocr/TesseractOCRConfig.java     | 32 ++++++++++++--------
 .../tika/parser/ocr/TesseractOCRParser.java     | 13 +++++---
 2 files changed, 27 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
index 558a83d..101003f 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java
@@ -256,12 +256,14 @@ public class TesseractOCRConfig implements Serializable{
                this.timeout = timeout;
        }
 
-       /** @see #setTimeout(int timeout)*/
+       /** @see #setTimeout(int timeout)
+        * @return timeout value for Tesseract */
        public int getTimeout() {
                return timeout;
        }
        
-       /** @see #setEnableImageProcessing(boolean) */
+       /** @see #setEnableImageProcessing(boolean)
+        * @return image processing is enabled or not */
        public int isEnableImageProcessing() {
                return enableImageProcessing;
        }
@@ -282,12 +284,12 @@ public class TesseractOCRConfig implements Serializable{
        }
 
        /**
-        * @param density the density to set
+        * @param density the density to set. Valid range of values is 150-1200.
         * Default value is 300.
         */
        public void setDensity(int density) {
                if(density < 150 || density > 1200) {
-                       throw new IllegalArgumentException("Invalid density 
value");
+                       throw new IllegalArgumentException("Invalid density 
value. Valid range of values is 150-1200.");
                }
                this.density = density;
        }
@@ -300,7 +302,7 @@ public class TesseractOCRConfig implements Serializable{
        }
 
        /**
-        * @param depth the depth to set
+        * @param depth the depth to set. Valid values are 2, 4, 8, 16, 32, 64, 
256, 4096.
         * Default value is 4.
         */
        public void setDepth(int depth) {
@@ -311,7 +313,7 @@ public class TesseractOCRConfig implements Serializable{
                                return;
                        }
                }
-               throw new IllegalArgumentException("Invalid depth value");
+               throw new IllegalArgumentException("Invalid depth value. Valid 
values are 2, 4, 8, 16, 32, 64, 256, 4096.");
        }
 
        /**
@@ -329,7 +331,7 @@ public class TesseractOCRConfig implements Serializable{
                if(!colorspace.equals(null)) {
                        this.colorspace = colorspace;
                } else {
-                       throw new IllegalArgumentException("Invalid colorspace 
value");
+                       throw new IllegalArgumentException("Colorspace value 
cannot be null.");
                }
        }
 
@@ -341,12 +343,13 @@ public class TesseractOCRConfig implements Serializable{
        }
 
        /**
-        * @param filter the filter to set
+        * @param filter the filter to set. Valid values are point, hermite, 
cubic, box, gaussian, catrom, triangle, quadratic and mitchell.
         * Default value is triangle.
         */
        public void setFilter(String filter) {
                if(filter.equals(null)) {
-                       throw new IllegalArgumentException("Invalid filter 
value");
+                       throw new IllegalArgumentException("Filter value cannot 
be null. Valid values are point, hermite, "
+                                       + "cubic, box, gaussian, catrom, 
triangle, quadratic and mitchell.");
                }
                
                String[] allowedFilters = {"Point", "Hermite", "Cubic", "Box", 
"Gaussian", "Catrom", "Triangle", "Quadratic", "Mitchell"};
@@ -356,7 +359,8 @@ public class TesseractOCRConfig implements Serializable{
                                return;
                        }
                }
-               throw new IllegalArgumentException("Invalid filter value");
+               throw new IllegalArgumentException("Invalid filter value. Valid 
values are point, hermite, "
+                                       + "cubic, box, gaussian, catrom, 
triangle, quadratic and mitchell.");
        }
 
        /**
@@ -367,7 +371,7 @@ public class TesseractOCRConfig implements Serializable{
        }
 
        /**
-        * @param resize the resize to set
+        * @param resize the resize to set. Valid range of values is 100-900.
         * Default value is 900.
         */
        public void setResize(int resize) {
@@ -377,10 +381,11 @@ public class TesseractOCRConfig implements Serializable{
                                return;
                        }
                }
-               throw new IllegalArgumentException("Invalid resize value");
+               throw new IllegalArgumentException("Invalid resize value. Valid 
range of values is 100-900.");
        }
 
-       /** @see #setImageMagickPath(String ImageMagickPath)*/
+       /** @see #setImageMagickPath(String ImageMagickPath)
+        * @return path to ImageMagick file. */
        public String getImageMagickPath() {
                
                return ImageMagickPath;
@@ -388,6 +393,7 @@ public class TesseractOCRConfig implements Serializable{
        
        /**
         * Set the path to the ImageMagick executable, needed if it is not on 
system path.
+        * @param path to ImageMagick file.
         */
        public void setImageMagickPath(String ImageMagickPath) {
                if(!ImageMagickPath.isEmpty() && 
!ImageMagickPath.endsWith(File.separator))

http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index c2ef1ee..ae67425 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -137,7 +137,7 @@ public class TesseractOCRParser extends AbstractParser {
      
     }
     
-    public boolean hasImageMagick(TesseractOCRConfig config) {
+    private boolean hasImageMagick(TesseractOCRConfig config) {
         // Fetch where the config says to find ImageMagick Program
         String ImageMagick = config.getImageMagickPath() + 
getImageMagickProg();
 
@@ -155,7 +155,7 @@ public class TesseractOCRParser extends AbstractParser {
      
     }
     
-    public boolean hasPython() {
+    private boolean hasPython() {
        // check if python is installed and if the rotation program path has 
been specified correctly
         
        boolean hasPython = false;
@@ -261,8 +261,8 @@ public class TesseractOCRParser extends AbstractParser {
      * This method is used to process the image to an OCR-friendly format.
      * @param streamingObject input image to be processed
      * @param config TesseractOCRconfig class to get ImageMagick properties
-     * @throws IOException
-     * @throws TikaException
+     * @throws IOException if an input error occurred
+     * @throws TikaException if an exception timed out
      */
     private void processImage(File streamingObject, TesseractOCRConfig config) 
throws IOException, TikaException {
        
@@ -292,7 +292,10 @@ public class TesseractOCRParser extends AbstractParser {
         }
               
         // process the image - parameter values can be set in 
TesseractOCRConfig.properties
-       String line = "convert -density " + config.getDensity() + " -depth " + 
config.getDepth() + " -colorspace " + config.getColorspace() +  " -filter " + 
config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " 
" + streamingObject.getAbsolutePath() + " " + 
streamingObject.getAbsolutePath();           
+       String line = "convert -density " + config.getDensity() + " -depth " + 
config.getDepth() + 
+                       " -colorspace " + config.getColorspace() +  " -filter " 
+ config.getFilter() + 
+                       " -resize " + config.getResize() + "% -rotate "+ angle 
+ " " + streamingObject.getAbsolutePath() + 
+                       " " + streamingObject.getAbsolutePath();        
         cmdLine = CommandLine.parse(line);
                try {
                        executor.execute(cmdLine);

Reply via email to