fix for TIKA-2021 contributed by Zarana Parekh

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/de84d71b
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/de84d71b
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/de84d71b

Branch: refs/heads/master
Commit: de84d71b145045792b8a3bd175634251623188dc
Parents: 48b27d2
Author: Zarana Parekh <[email protected]>
Authored: Fri Jun 24 19:28:26 2016 -0700
Committer: Zarana Parekh <[email protected]>
Committed: Fri Jun 24 19:28:26 2016 -0700

----------------------------------------------------------------------
 tika-bundle/pom.xml                             | 628 +++++++++----------
 .../tika/parser/ocr/TesseractOCRParser.java     |  26 +-
 2 files changed, 327 insertions(+), 327 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/de84d71b/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 7fb5c8d..e94b43f 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -113,320 +113,320 @@
 
   <build>
        <pluginManagement>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <extensions>true</extensions>
-        <configuration>
-          <instructions>
-            <_runsystempackages>com.sun.xml.bind.marshaller, 
com.sun.xml.internal.bind.marshaller</_runsystempackages>
-            <Bundle-Activator>
-              org.apache.tika.parser.internal.Activator
-            </Bundle-Activator>
-            <Embed-Dependency>
-              tika-parsers;inline=true,
-              commons-compress, xz, commons-codec, commons-csv,
-              commons-io, commons-exec, junrar,
-              
pdfbox,pdfbox-tools,pdfbox-debugger,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
-              poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
-              curvesapi,
-              xmlbeans,
-              jackcess,
-              commons-lang,
-              tagsoup,
-              asm,
-              juniversalchardet,
-              vorbis-java-core, vorbis-java-tika,
-              isoparser,
-              metadata-extractor, xmpcore, json-simple,
-              boilerpipe, rome, rome-utils, opennlp-tools, opennlp-maxent,
-              geoapi, sis-metadata, sis-netcdf, sis-utility,
-              sis-storage, apache-mime4j-core, apache-mime4j-dom,
-              jsr-275, jhighlight, java-libpst, jwnl,
-              netcdf4, grib, cdm, httpservices, jcip-annotations,
-              jmatio, guava
-            </Embed-Dependency>
-            <Embed-Transitive>true</Embed-Transitive>
-            <Bundle-DocURL>${project.url}</Bundle-DocURL>
-            <Export-Package>
-              !org.apache.tika.parser,
-              !org.apache.tika.parser.external,
-              org.apache.tika.parser.*,
-            </Export-Package>
-            <Import-Package>
-              !org.junit,
-              !org.junit.*,
-              !junit.*,
-              !org.apache.ctakes.*,
-              !org.apache.uima.*,
-              *,
-              org.apache.tika.fork,
-              android.util;resolution:=optional,
-              com.adobe.xmp;resolution:=optional,
-              com.adobe.xmp.properties;resolution:=optional,
-              com.google.protobuf;resolution:=optional,
-              com.ibm.icu.text;resolution:=optional,
-              com.sleepycat.je;resolution:=optional,
-              com.sun.javadoc;resolution:=optional,
-              com.sun.xml.bind.marshaller;resolution:=optional,
-              com.sun.xml.internal.bind.marshaller;resolution:=optional,
-              com.sun.msv.datatype;resolution:=optional,
-              com.sun.msv.datatype.xsd;resolution:=optional,
-              com.sun.tools.javadoc;resolution:=optional,
-              edu.wisc.ssec.mcidas;resolution:=optional,
-              edu.wisc.ssec.mcidas.adde;resolution:=optional,
-              javax.activation;resolution:=optional,
-              javax.annotation;resolution:=optional,
-              javax.mail;resolution:=optional,
-              javax.mail.internet;resolution:=optional,
-              javax.servlet.annotation;resolution:=optional,
-              javax.servlet;resolution:=optional,
-              javax.servlet.http;resolution:=optional,
-              javax.measure.converter;resolution:=optional,
-              javax.ws.rs.core;resolution:=optional,
-              net.sf.ehcache;resolution:=optional,
-              nu.xom;resolution:=optional,
-              opendap.dap.http;resolution:=optional,
-              opendap.dap;resolution:=optional,
-              opendap.dap.parser;resolution:=optional,
-              opennlp.maxent;resolution:=optional,
-              opennlp.tools.namefind;resolution:=optional,
-              net.didion.jwnl;resolution:=optional,
-              org.apache.cxf.jaxrs.client;resolution:=optional,
-              org.apache.cxf.jaxrs.ext.multipart;resolution:=optional,
-              org.apache.commons.exec;resolution:=optional,
-              org.apache.commons.io;resolution:=optional,
-              org.apache.commons.httpclient;resolution:=optional,
-              org.apache.commons.httpclient.auth;resolution:=optional,
-              org.apache.commons.httpclient.methods;resolution:=optional,
-              org.apache.commons.httpclient.params;resolution:=optional,
-              org.apache.commons.httpclient.protocol;resolution:=optional,
-              org.apache.commons.httpclient.util;resolution:=optional,
-              org.apache.commons.vfs2;resolution:=optional,
-              org.apache.commons.vfs2.provider;resolution:=optional,
-              org.apache.commons.vfs2.util;resolution:=optional,
-              org.apache.crimson.jaxp;resolution:=optional,
-              org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
-              org.apache.sis;resolution:=optional,
-              org.apache.sis.distance;resolution:=optional,
-              org.apache.sis.geometry;resolution:=optional,
-              org.apache.tools.ant;resolution:=optional,
-              org.apache.tools.ant.taskdefs;resolution:=optional,
-              org.apache.tools.ant.types;resolution:=optional,
-              org.apache.xerces.parsers;resolution:=optional,
-              org.apache.xerces.util;resolution:=optional,
-              org.apache.xerces.xni;resolution:=optional,
-              org.apache.xerces.xni.parser;resolution:=optional,
-              org.apache.xml.resolver;resolution:=optional,
-              org.apache.xml.resolver.tools;resolution:=optional,
-              org.apache.xml.security;resolution:=optional,
-              org.apache.xml.security.c14n;resolution:=optional,
-              org.apache.xml.security.utils;resolution:=optional,
-              org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
-              org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
-              org.bouncycastle.cert;resolution:=optional,
-              org.bouncycastle.cert.jcajce;resolution:=optional,
-              org.bouncycastle.cert.ocsp;resolution:=optional,
-              org.bouncycastle.cms.bc;resolution:=optional,
-              org.bouncycastle.operator;resolution:=optional,
-              org.bouncycastle.operator.bc;resolution:=optional,
-              org.bouncycastle.tsp;resolution:=optional,
-              org.cyberneko.html.xercesbridge;resolution:=optional,
-              org.etsi.uri.x01903.v14;resolution:=optional,
-              org.ibex.nestedvm;resolution:=optional,
-              org.gjt.xpp;resolution:=optional,
-              org.jaxen;resolution:=optional,
-              org.jaxen.dom4j;resolution:=optional,
-              org.jaxen.pattern;resolution:=optional,
-              org.jaxen.saxpath;resolution:=optional,
-              org.jdom;resolution:=optional,
-              org.jdom.input;resolution:=optional,
-              org.jdom.output;resolution:=optional,
-              org.jdom2;resolution:=optional,
-              org.jdom2.input;resolution:=optional,
-              org.jdom2.input.sax;resolution:=optional,
-              org.jdom2.output;resolution:=optional,
-              org.jdom2.filter;resolution:=optional,
-              org.json.simple;resolution:=optional,
-              org.json;resolution:=optional,
-              
org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
-              
org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
-              org.osgi.framework;resolution:=optional,
-              org.quartz;resolution:=optional,
-              org.quartz.impl;resolution:=optional,
-              org.slf4j;resolution:=optional,
-              org.sqlite;resolution:=optional,
-              org.w3c.dom;resolution:=optional,
-              org.relaxng.datatype;resolution:=optional,
-              org.xml.sax;resolution:=optional,
-              org.xml.sax.ext;resolution:=optional,
-              org.xml.sax.helpers;resolution:=optional,
-              org.xmlpull.v1;resolution:=optional,
-              com.microsoft.schemas.office.powerpoint;resolution:=optional,
-              com.microsoft.schemas.office.word;resolution:=optional,          
    sun.misc;resolution:=optional,
-              ucar.units;resolution:=optional,
-              ucar.httpservices;resolution:=optional,
-              ucar.nc2.util;resolution:=optional,
-              ucar.nc2.util.cache;resolution:=optional,
-              ucar.nc2.dataset;resolution:=optional,
-              ucar.nc2;resolution:=optional,
-              ucar.nc2.constants;resolution:=optional,
-              ucar.nc2.dt;resolution:=optional,
-              ucar.nc2.dt.grid;resolution:=optional,
-              ucar.nc2.ft;resolution:=optional,
-              ucar.nc2.iosp;resolution:=optional,
-              ucar.nc2.iosp.hdf4;resolution:=optional,
-              ucar.nc2.ncml;resolution:=optional,
-              ucar.nc2.stream;resolution:=optional,
-              ucar.nc2.time;resolution:=optional,
-              ucar.nc2.units;resolution:=optional,
-              ucar.nc2.wmo;resolution:=optional,
-              ucar.nc2.write;resolution:=optional,
-              ucar.ma2;resolution:=optional,
-              ucar.grib;resolution:=optional,
-              ucar.grib.grib1;resolution:=optional,
-              ucar.grib.grib2;resolution:=optional,
-              ucar.grid;resolution:=optional,
-              ucar.unidata.geoloc;resolution:=optional,
-              ucar.unidata.geoloc.projection;resolution:=optional,
-              ucar.unidata.geoloc.projection.proj4;resolution:=optional,
-              ucar.unidata.geoloc.projection.sat;resolution:=optional,
-              ucar.unidata.io;resolution:=optional,
-              ucar.unidata.util;resolution:=optional,
-              com.jmatio.io;resolution:=optional,
-              com.google.gson;resolution:=optional,
-              visad;resolution:=optional,
-              visad.data;resolution:=optional,
-              visad.data.vis5d;resolution:=optional,
-              visad.jmet;resolution:=optional,
-              visad.util;resolution:=optional,
-              colorspace;resolution:=optional,
-              com.sun.jna;resolution:=optional,
-              com.sun.jna.ptr;resolution:=optional,
-              icc;resolution:=optional,
-              jj2000.j2k.codestream;resolution:=optional,
-              jj2000.j2k.codestream.reader;resolution:=optional,
-              jj2000.j2k.decoder;resolution:=optional,
-              jj2000.j2k.entropy.decoder;resolution:=optional,
-              jj2000.j2k.fileformat.reader;resolution:=optional,
-              jj2000.j2k.image;resolution:=optional,
-              jj2000.j2k.image.invcomptransf;resolution:=optional,
-              jj2000.j2k.image.output;resolution:=optional,
-              jj2000.j2k.io;resolution:=optional,
-              jj2000.j2k.quantization.dequantizer;resolution:=optional,
-              jj2000.j2k.roi;resolution:=optional,
-              jj2000.j2k.util;resolution:=optional,
-              jj2000.j2k.wavelet.synthesis;resolution:=optional,
-              org.itadaki.bzip2;resolution:=optional,
-              org.jsoup;resolution:=optional,
-              org.jsoup.nodes;resolution:=optional,
-              org.jsoup.select;resolution:=optional,
-              thredds.featurecollection;resolution:=optional,
-              thredds.filesystem;resolution:=optional,
-              thredds.inventory;resolution:=optional,
-              thredds.inventory.filter;resolution:=optional,
-              thredds.inventory.partition;resolution:=optional,
-              com.beust.jcommander;resolution:=optional,
-              com.google.common.base;resolution:=optional,
-              com.google.common.math;resolution:=optional,
-              org.apache.http;resolution:=optional,
-              org.apache.http.client.utils;resolution:=optional,
-              org.joda.time;resolution:=optional,
-              org.joda.time.chrono;resolution:=optional,
-              org.joda.time.field;resolution:=optional,
-              org.joda.time.format;resolution:=optional,
-              sun.reflect.generics.reflectiveObjects;resolution:=optional,
-              org.apache.http.auth;resolution:=optional,
-              org.apache.http.client;resolution:=optional,
-              org.apache.http.client.entity;resolution:=optional,
-              org.apache.http.client.methods;resolution:=optional,
-              org.apache.http.conn;resolution:=optional,
-              org.apache.http.conn.scheme;resolution:=optional,
-              org.apache.http.cookie;resolution:=optional,
-              org.apache.http.entity;resolution:=optional,
-              org.apache.http.impl.client;resolution:=optional,
-              org.apache.http.impl.conn;resolution:=optional,
-              org.apache.http.message;resolution:=optional,
-              org.apache.http.params;resolution:=optional,
-              org.apache.http.protocol;resolution:=optional,
-              org.apache.http.util;resolution:=optional
-            </Import-Package>
-          </instructions>
-        </configuration>
-      </plugin>
-      <!-- TIKA-763: Workaround to avoid including LGPL classes -->
-      <plugin>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>prepare-package</phase>
-            <goals>
-              <goal>unpack-dependencies</goal>
-            </goals>
-            <configuration>
-              <includeArtifactIds>netcdf</includeArtifactIds>
-              <excludes>
-                ucar/nc2/iosp/fysat/Fysat*.class,
-                ucar/nc2/dataset/transform/VOceanSG1*class,
-                ucar/unidata/geoloc/vertical/OceanSG*.class,
-                META-INF/**,CHANGES,README
-              </excludes>
-              <outputDirectory>
-                ${project.build.directory}/classes
-              </outputDirectory>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <!-- The Tika Bundle has no java code of its own, so no need to do -->
-      <!--  any forbidden API checking against it (it gets confused...) -->
-      <plugin>
-        <groupId>de.thetaphi</groupId>
-        <artifactId>forbiddenapis</artifactId>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>pre-integration-test</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-            <configuration>
-              <descriptor>test-bundles.xml</descriptor>
-              <finalName>test</finalName>
-              <attach>false</attach>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <artifactId>maven-failsafe-plugin</artifactId>
-        <version>2.10</version>
-        <executions>
-          <execution>
-            <goals>
-              <goal>integration-test</goal>
-              <goal>verify</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <systemPropertyVariables>
-            <org.ops4j.pax.logging.DefaultServiceLog.level>
-              WARN
-            </org.ops4j.pax.logging.DefaultServiceLog.level>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
+           <plugins>
+             <plugin>
+               <groupId>org.apache.felix</groupId>
+               <artifactId>maven-bundle-plugin</artifactId>
+               <extensions>true</extensions>
+               <configuration>
+                 <instructions>
+                   <_runsystempackages>com.sun.xml.bind.marshaller, 
com.sun.xml.internal.bind.marshaller</_runsystempackages>
+                   <Bundle-Activator>
+                     org.apache.tika.parser.internal.Activator
+                   </Bundle-Activator>
+                   <Embed-Dependency>
+                     tika-parsers;inline=true,
+                     commons-compress, xz, commons-codec, commons-csv,
+                     commons-io, commons-exec, junrar,
+                     
pdfbox,pdfbox-tools,pdfbox-debugger,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
+                     poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
+                     curvesapi,
+                     xmlbeans,
+                     jackcess,
+                     commons-lang,
+                     tagsoup,
+                     asm,
+                     juniversalchardet,
+                     vorbis-java-core, vorbis-java-tika,
+                     isoparser,
+                     metadata-extractor, xmpcore, json-simple,
+                     boilerpipe, rome, rome-utils, opennlp-tools, 
opennlp-maxent,
+                     geoapi, sis-metadata, sis-netcdf, sis-utility,
+                     sis-storage, apache-mime4j-core, apache-mime4j-dom,
+                     jsr-275, jhighlight, java-libpst, jwnl,
+                     netcdf4, grib, cdm, httpservices, jcip-annotations,
+                     jmatio, guava
+                   </Embed-Dependency>
+                   <Embed-Transitive>true</Embed-Transitive>
+                   <Bundle-DocURL>${project.url}</Bundle-DocURL>
+                   <Export-Package>
+                     !org.apache.tika.parser,
+                     !org.apache.tika.parser.external,
+                     org.apache.tika.parser.*,
+                   </Export-Package>
+                   <Import-Package>
+                     !org.junit,
+                     !org.junit.*,
+                     !junit.*,
+                     !org.apache.ctakes.*,
+                     !org.apache.uima.*,
+                     *,
+                     org.apache.tika.fork,
+                     android.util;resolution:=optional,
+                     com.adobe.xmp;resolution:=optional,
+                     com.adobe.xmp.properties;resolution:=optional,
+                     com.google.protobuf;resolution:=optional,
+                     com.ibm.icu.text;resolution:=optional,
+                     com.sleepycat.je;resolution:=optional,
+                     com.sun.javadoc;resolution:=optional,
+                     com.sun.xml.bind.marshaller;resolution:=optional,
+                     com.sun.xml.internal.bind.marshaller;resolution:=optional,
+                     com.sun.msv.datatype;resolution:=optional,
+                     com.sun.msv.datatype.xsd;resolution:=optional,
+                     com.sun.tools.javadoc;resolution:=optional,
+                     edu.wisc.ssec.mcidas;resolution:=optional,
+                     edu.wisc.ssec.mcidas.adde;resolution:=optional,
+                     javax.activation;resolution:=optional,
+                     javax.annotation;resolution:=optional,
+                     javax.mail;resolution:=optional,
+                     javax.mail.internet;resolution:=optional,
+                     javax.servlet.annotation;resolution:=optional,
+                     javax.servlet;resolution:=optional,
+                     javax.servlet.http;resolution:=optional,
+                     javax.measure.converter;resolution:=optional,
+                     javax.ws.rs.core;resolution:=optional,
+                     net.sf.ehcache;resolution:=optional,
+                     nu.xom;resolution:=optional,
+                     opendap.dap.http;resolution:=optional,
+                     opendap.dap;resolution:=optional,
+                     opendap.dap.parser;resolution:=optional,
+                     opennlp.maxent;resolution:=optional,
+                     opennlp.tools.namefind;resolution:=optional,
+                     net.didion.jwnl;resolution:=optional,
+                     org.apache.cxf.jaxrs.client;resolution:=optional,
+                     org.apache.cxf.jaxrs.ext.multipart;resolution:=optional,
+                     org.apache.commons.exec;resolution:=optional,
+                     org.apache.commons.io;resolution:=optional,
+                     org.apache.commons.httpclient;resolution:=optional,
+                     org.apache.commons.httpclient.auth;resolution:=optional,
+                     
org.apache.commons.httpclient.methods;resolution:=optional,
+                     org.apache.commons.httpclient.params;resolution:=optional,
+                     
org.apache.commons.httpclient.protocol;resolution:=optional,
+                     org.apache.commons.httpclient.util;resolution:=optional,
+                     org.apache.commons.vfs2;resolution:=optional,
+                     org.apache.commons.vfs2.provider;resolution:=optional,
+                     org.apache.commons.vfs2.util;resolution:=optional,
+                     org.apache.crimson.jaxp;resolution:=optional,
+                     org.apache.jcp.xml.dsig.internal.dom;resolution:=optional,
+                     org.apache.sis;resolution:=optional,
+                     org.apache.sis.distance;resolution:=optional,
+                     org.apache.sis.geometry;resolution:=optional,
+                     org.apache.tools.ant;resolution:=optional,
+                     org.apache.tools.ant.taskdefs;resolution:=optional,
+                     org.apache.tools.ant.types;resolution:=optional,
+                     org.apache.xerces.parsers;resolution:=optional,
+                     org.apache.xerces.util;resolution:=optional,
+                     org.apache.xerces.xni;resolution:=optional,
+                     org.apache.xerces.xni.parser;resolution:=optional,
+                     org.apache.xml.resolver;resolution:=optional,
+                     org.apache.xml.resolver.tools;resolution:=optional,
+                     org.apache.xml.security;resolution:=optional,
+                     org.apache.xml.security.c14n;resolution:=optional,
+                     org.apache.xml.security.utils;resolution:=optional,
+                     org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
+                     
org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
+                     org.bouncycastle.cert;resolution:=optional,
+                     org.bouncycastle.cert.jcajce;resolution:=optional,
+                     org.bouncycastle.cert.ocsp;resolution:=optional,
+                     org.bouncycastle.cms.bc;resolution:=optional,
+                     org.bouncycastle.operator;resolution:=optional,
+                     org.bouncycastle.operator.bc;resolution:=optional,
+                     org.bouncycastle.tsp;resolution:=optional,
+                     org.cyberneko.html.xercesbridge;resolution:=optional,
+                     org.etsi.uri.x01903.v14;resolution:=optional,
+                     org.ibex.nestedvm;resolution:=optional,
+                     org.gjt.xpp;resolution:=optional,
+                     org.jaxen;resolution:=optional,
+                     org.jaxen.dom4j;resolution:=optional,
+                     org.jaxen.pattern;resolution:=optional,
+                     org.jaxen.saxpath;resolution:=optional,
+                     org.jdom;resolution:=optional,
+                     org.jdom.input;resolution:=optional,
+                     org.jdom.output;resolution:=optional,
+                     org.jdom2;resolution:=optional,
+                     org.jdom2.input;resolution:=optional,
+                     org.jdom2.input.sax;resolution:=optional,
+                     org.jdom2.output;resolution:=optional,
+                     org.jdom2.filter;resolution:=optional,
+                     org.json.simple;resolution:=optional,
+                     org.json;resolution:=optional,
+                     
org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
+                     
org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
+                     org.osgi.framework;resolution:=optional,
+                     org.quartz;resolution:=optional,
+                     org.quartz.impl;resolution:=optional,
+                     org.slf4j;resolution:=optional,
+                     org.sqlite;resolution:=optional,
+                     org.w3c.dom;resolution:=optional,
+                     org.relaxng.datatype;resolution:=optional,
+                     org.xml.sax;resolution:=optional,
+                     org.xml.sax.ext;resolution:=optional,
+                     org.xml.sax.helpers;resolution:=optional,
+                     org.xmlpull.v1;resolution:=optional,
+                     
com.microsoft.schemas.office.powerpoint;resolution:=optional,
+                     com.microsoft.schemas.office.word;resolution:=optional,   
           sun.misc;resolution:=optional,
+                     ucar.units;resolution:=optional,
+                     ucar.httpservices;resolution:=optional,
+                     ucar.nc2.util;resolution:=optional,
+                     ucar.nc2.util.cache;resolution:=optional,
+                     ucar.nc2.dataset;resolution:=optional,
+                     ucar.nc2;resolution:=optional,
+                     ucar.nc2.constants;resolution:=optional,
+                     ucar.nc2.dt;resolution:=optional,
+                     ucar.nc2.dt.grid;resolution:=optional,
+                     ucar.nc2.ft;resolution:=optional,
+                     ucar.nc2.iosp;resolution:=optional,
+                     ucar.nc2.iosp.hdf4;resolution:=optional,
+                     ucar.nc2.ncml;resolution:=optional,
+                     ucar.nc2.stream;resolution:=optional,
+                     ucar.nc2.time;resolution:=optional,
+                     ucar.nc2.units;resolution:=optional,
+                     ucar.nc2.wmo;resolution:=optional,
+                     ucar.nc2.write;resolution:=optional,
+                     ucar.ma2;resolution:=optional,
+                     ucar.grib;resolution:=optional,
+                     ucar.grib.grib1;resolution:=optional,
+                     ucar.grib.grib2;resolution:=optional,
+                     ucar.grid;resolution:=optional,
+                     ucar.unidata.geoloc;resolution:=optional,
+                     ucar.unidata.geoloc.projection;resolution:=optional,
+                     ucar.unidata.geoloc.projection.proj4;resolution:=optional,
+                     ucar.unidata.geoloc.projection.sat;resolution:=optional,
+                     ucar.unidata.io;resolution:=optional,
+                     ucar.unidata.util;resolution:=optional,
+                     com.jmatio.io;resolution:=optional,
+                     com.google.gson;resolution:=optional,
+                     visad;resolution:=optional,
+                     visad.data;resolution:=optional,
+                     visad.data.vis5d;resolution:=optional,
+                     visad.jmet;resolution:=optional,
+                     visad.util;resolution:=optional,
+                     colorspace;resolution:=optional,
+                     com.sun.jna;resolution:=optional,
+                     com.sun.jna.ptr;resolution:=optional,
+                     icc;resolution:=optional,
+                     jj2000.j2k.codestream;resolution:=optional,
+                     jj2000.j2k.codestream.reader;resolution:=optional,
+                     jj2000.j2k.decoder;resolution:=optional,
+                     jj2000.j2k.entropy.decoder;resolution:=optional,
+                     jj2000.j2k.fileformat.reader;resolution:=optional,
+                     jj2000.j2k.image;resolution:=optional,
+                     jj2000.j2k.image.invcomptransf;resolution:=optional,
+                     jj2000.j2k.image.output;resolution:=optional,
+                     jj2000.j2k.io;resolution:=optional,
+                     jj2000.j2k.quantization.dequantizer;resolution:=optional,
+                     jj2000.j2k.roi;resolution:=optional,
+                     jj2000.j2k.util;resolution:=optional,
+                     jj2000.j2k.wavelet.synthesis;resolution:=optional,
+                     org.itadaki.bzip2;resolution:=optional,
+                     org.jsoup;resolution:=optional,
+                     org.jsoup.nodes;resolution:=optional,
+                     org.jsoup.select;resolution:=optional,
+                     thredds.featurecollection;resolution:=optional,
+                     thredds.filesystem;resolution:=optional,
+                     thredds.inventory;resolution:=optional,
+                     thredds.inventory.filter;resolution:=optional,
+                     thredds.inventory.partition;resolution:=optional,
+                     com.beust.jcommander;resolution:=optional,
+                     com.google.common.base;resolution:=optional,
+                     com.google.common.math;resolution:=optional,
+                     org.apache.http;resolution:=optional,
+                     org.apache.http.client.utils;resolution:=optional,
+                     org.joda.time;resolution:=optional,
+                     org.joda.time.chrono;resolution:=optional,
+                     org.joda.time.field;resolution:=optional,
+                     org.joda.time.format;resolution:=optional,
+                     
sun.reflect.generics.reflectiveObjects;resolution:=optional,
+                     org.apache.http.auth;resolution:=optional,
+                     org.apache.http.client;resolution:=optional,
+                     org.apache.http.client.entity;resolution:=optional,
+                     org.apache.http.client.methods;resolution:=optional,
+                     org.apache.http.conn;resolution:=optional,
+                     org.apache.http.conn.scheme;resolution:=optional,
+                     org.apache.http.cookie;resolution:=optional,
+                     org.apache.http.entity;resolution:=optional,
+                     org.apache.http.impl.client;resolution:=optional,
+                     org.apache.http.impl.conn;resolution:=optional,
+                     org.apache.http.message;resolution:=optional,
+                     org.apache.http.params;resolution:=optional,
+                     org.apache.http.protocol;resolution:=optional,
+                     org.apache.http.util;resolution:=optional
+                   </Import-Package>
+                 </instructions>
+               </configuration>
+             </plugin>
+             <!-- TIKA-763: Workaround to avoid including LGPL classes -->
+             <plugin>
+               <artifactId>maven-dependency-plugin</artifactId>
+               <executions>
+                 <execution>
+                   <phase>prepare-package</phase>
+                   <goals>
+                     <goal>unpack-dependencies</goal>
+                   </goals>
+                   <configuration>
+                     <includeArtifactIds>netcdf</includeArtifactIds>
+                     <excludes>
+                       ucar/nc2/iosp/fysat/Fysat*.class,
+                       ucar/nc2/dataset/transform/VOceanSG1*class,
+                       ucar/unidata/geoloc/vertical/OceanSG*.class,
+                       META-INF/**,CHANGES,README
+                     </excludes>
+                     <outputDirectory>
+                       ${project.build.directory}/classes
+                     </outputDirectory>
+                   </configuration>
+                 </execution>
+               </executions>
+             </plugin>
+       
+             <!-- The Tika Bundle has no java code of its own, so no need to 
do -->
+             <!--  any forbidden API checking against it (it gets confused...) 
-->
+             <plugin>
+               <groupId>de.thetaphi</groupId>
+               <artifactId>forbiddenapis</artifactId>
+               <configuration>
+                 <skip>true</skip>
+               </configuration>
+             </plugin>
+       
+             <plugin>
+               <artifactId>maven-assembly-plugin</artifactId>
+               <executions>
+                 <execution>
+                   <phase>pre-integration-test</phase>
+                   <goals>
+                     <goal>single</goal>
+                   </goals>
+                   <configuration>
+                     <descriptor>test-bundles.xml</descriptor>
+                     <finalName>test</finalName>
+                     <attach>false</attach>
+                   </configuration>
+                 </execution>
+               </executions>
+             </plugin>
+       
+             <plugin>
+               <artifactId>maven-failsafe-plugin</artifactId>
+               <version>2.10</version>
+               <executions>
+                 <execution>
+                   <goals>
+                     <goal>integration-test</goal>
+                     <goal>verify</goal>
+                   </goals>
+                 </execution>
+               </executions>
+               <configuration>
+                 <systemPropertyVariables>
+                   <org.ops4j.pax.logging.DefaultServiceLog.level>
+                     WARN
+                   </org.ops4j.pax.logging.DefaultServiceLog.level>
+                 </systemPropertyVariables>
+               </configuration>
+             </plugin>
+           </plugins>
     </pluginManagement>
   </build>
 

http://git-wip-us.apache.org/repos/asf/tika/blob/de84d71b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 1280aec..dae4a64 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -160,7 +160,7 @@ public class TesseractOCRParser extends AbstractParser {
         
        boolean hasPython = false;
        
-       try {
+               try {
                        Process proc = Runtime.getRuntime().exec("python -h");
                        BufferedReader stdInput = new BufferedReader(new 
InputStreamReader(proc.getInputStream()));
                        if(stdInput.read() != -1) {
@@ -169,7 +169,7 @@ public class TesseractOCRParser extends AbstractParser {
                } catch (IOException e) {
                        e.printStackTrace();
                } 
-       
+
                return hasPython;       
     }
     
@@ -283,22 +283,22 @@ public class TesseractOCRParser extends AbstractParser {
         // determine the angle of rotation required to make the text horizontal
         CommandLine cmdLine = CommandLine.parse(cmd);
         if(hasPython()) {
-               try {
-                       executor.execute(cmdLine);
-                       angle = outputStream.toString().trim();
-            } catch(Exception e) {     
-                       e.printStackTrace();
-               }
+                       try {
+                               executor.execute(cmdLine);
+                               angle = outputStream.toString().trim();
+                       } catch(Exception e) {  
+                               e.printStackTrace();
+                       }
         }
               
         // process the image - parameter values can be set in 
TesseractOCRConfig.properties
        String line = "convert -density " + config.getDensity() + " -depth " + 
config.getDepth() + " -colorspace " + config.getColorspace() +  " -filter " + 
config.getFilter() + " -resize " + config.getResize() + "% -rotate "+ angle + " 
" + streamingObject.getAbsolutePath() + " " + 
streamingObject.getAbsolutePath();           
         cmdLine = CommandLine.parse(line);
-        try {
-               executor.execute(cmdLine);
-        } catch(Exception e) { 
-               e.printStackTrace();
-       } 
+               try {
+                       executor.execute(cmdLine);
+               } catch(Exception e) {  
+                       e.printStackTrace();
+               } 
        
         tmp.close();
     }

Reply via email to