updated Javadoc for Tesseract config and parser
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6773d42d Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6773d42d Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6773d42d Branch: refs/heads/master Commit: 6773d42de77230dff621a1010ed37f0505dfa302 Parents: bc6667c Author: Zarana Parekh <[email protected]> Authored: Thu Jun 30 15:58:12 2016 -0700 Committer: Zarana Parekh <[email protected]> Committed: Thu Jun 30 15:58:12 2016 -0700 ---------------------------------------------------------------------- .../tika/parser/ocr/TesseractOCRConfig.java | 32 ++++++++++++-------- .../tika/parser/ocr/TesseractOCRParser.java | 13 +++++--- 2 files changed, 27 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java index 558a83d..101003f 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRConfig.java @@ -256,12 +256,14 @@ public class TesseractOCRConfig implements Serializable{ this.timeout = timeout; } - /** @see #setTimeout(int timeout)*/ + /** @see #setTimeout(int timeout) + * @return timeout value for Tesseract */ public int getTimeout() { return timeout; } - /** @see #setEnableImageProcessing(boolean) */ + /** @see #setEnableImageProcessing(boolean) + * @return image processing is enabled or not */ public int isEnableImageProcessing() { return enableImageProcessing; } @@ -282,12 +284,12 @@ public class TesseractOCRConfig implements Serializable{ } /** - * @param density the density to set + * @param density the density to set. Valid range of values is 150-1200. * Default value is 300. */ public void setDensity(int density) { if(density < 150 || density > 1200) { - throw new IllegalArgumentException("Invalid density value"); + throw new IllegalArgumentException("Invalid density value. Valid range of values is 150-1200."); } this.density = density; } @@ -300,7 +302,7 @@ public class TesseractOCRConfig implements Serializable{ } /** - * @param depth the depth to set + * @param depth the depth to set. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096. * Default value is 4. */ public void setDepth(int depth) { @@ -311,7 +313,7 @@ public class TesseractOCRConfig implements Serializable{ return; } } - throw new IllegalArgumentException("Invalid depth value"); + throw new IllegalArgumentException("Invalid depth value. Valid values are 2, 4, 8, 16, 32, 64, 256, 4096."); } /** @@ -329,7 +331,7 @@ public class TesseractOCRConfig implements Serializable{ if(!colorspace.equals(null)) { this.colorspace = colorspace; } else { - throw new IllegalArgumentException("Invalid colorspace value"); + throw new IllegalArgumentException("Colorspace value cannot be null."); } } @@ -341,12 +343,13 @@ public class TesseractOCRConfig implements Serializable{ } /** - * @param filter the filter to set + * @param filter the filter to set. Valid values are point, hermite, cubic, box, gaussian, catrom, triangle, quadratic and mitchell. * Default value is triangle. */ public void setFilter(String filter) { if(filter.equals(null)) { - throw new IllegalArgumentException("Invalid filter value"); + throw new IllegalArgumentException("Filter value cannot be null. Valid values are point, hermite, " + + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell."); } String[] allowedFilters = {"Point", "Hermite", "Cubic", "Box", "Gaussian", "Catrom", "Triangle", "Quadratic", "Mitchell"}; @@ -356,7 +359,8 @@ public class TesseractOCRConfig implements Serializable{ return; } } - throw new IllegalArgumentException("Invalid filter value"); + throw new IllegalArgumentException("Invalid filter value. Valid values are point, hermite, " + + "cubic, box, gaussian, catrom, triangle, quadratic and mitchell."); } /** @@ -367,7 +371,7 @@ public class TesseractOCRConfig implements Serializable{ } /** - * @param resize the resize to set + * @param resize the resize to set. Valid range of values is 100-900. * Default value is 900. */ public void setResize(int resize) { @@ -377,10 +381,11 @@ public class TesseractOCRConfig implements Serializable{ return; } } - throw new IllegalArgumentException("Invalid resize value"); + throw new IllegalArgumentException("Invalid resize value. Valid range of values is 100-900."); } - /** @see #setImageMagickPath(String ImageMagickPath)*/ + /** @see #setImageMagickPath(String ImageMagickPath) + * @return path to ImageMagick file. */ public String getImageMagickPath() { return ImageMagickPath; @@ -388,6 +393,7 @@ public class TesseractOCRConfig implements Serializable{ /** * Set the path to the ImageMagick executable, needed if it is not on system path. + * @param path to ImageMagick file. */ public void setImageMagickPath(String ImageMagickPath) { if(!ImageMagickPath.isEmpty() && !ImageMagickPath.endsWith(File.separator)) http://git-wip-us.apache.org/repos/asf/tika/blob/6773d42d/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java index c2ef1ee..ae67425 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java @@ -137,7 +137,7 @@ public class TesseractOCRParser extends AbstractParser { } - public boolean hasImageMagick(TesseractOCRConfig config) { + private boolean hasImageMagick(TesseractOCRConfig config) { // Fetch where the config says to find ImageMagick Program String ImageMagick = config.getImageMagickPath() + getImageMagickProg(); @@ -155,7 +155,7 @@ public class TesseractOCRParser extends AbstractParser { } - public boolean hasPython() { + private boolean hasPython() { // check if python is installed and if the rotation program path has been specified correctly boolean hasPython = false; @@ -261,8 +261,8 @@ public class TesseractOCRParser extends AbstractParser { * This method is used to process the image to an OCR-friendly format. * @param streamingObject input image to be processed * @param config TesseractOCRconfig class to get ImageMagick properties - * @throws IOException - * @throws TikaException + * @throws IOException if an input error occurred + * @throws TikaException if an exception timed out */ private void processImage(File streamingObject, TesseractOCRConfig config) throws IOException, TikaException { @@ -292,7 +292,10 @@ public class TesseractOCRParser extends AbstractParser { } // process the image - parameter values can be set in TesseractOCRConfig.properties - String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + " -colorspace " + config.getColorspace() + " -filter " + config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + " " + streamingObject.getAbsolutePath(); + String line = "convert -density " + config.getDensity() + " -depth " + config.getDepth() + + " -colorspace " + config.getColorspace() + " -filter " + config.getFilter() + + " -resize " + config.getResize() + "% -rotate "+ angle + " " + streamingObject.getAbsolutePath() + + " " + streamingObject.getAbsolutePath(); cmdLine = CommandLine.parse(line); try { executor.execute(cmdLine);
