This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 33c21a3a4 TIKA-3812 add example of how to configure gdal 
programmatically
33c21a3a4 is described below

commit 33c21a3a4c9b4805908600083786eb11c127fd94
Author: tallison <[email protected]>
AuthorDate: Wed Oct 5 06:52:27 2022 -0400

    TIKA-3812 add example of how to configure gdal programmatically
---
 .../java/org/apache/tika/parser/ocr/TestOCR.java   | 31 ++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
index f11ede9bf..5655bd72d 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
+++ 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
@@ -21,8 +21,11 @@ import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
@@ -36,6 +39,7 @@ import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.DefaultParser;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.gdal.GDALParser;
@@ -60,6 +64,33 @@ public class TestOCR extends TikaTest {
         assertContains("file contains", 
metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
     }
 
+    @Test
+    public void testPNGProgrammatically() throws Exception {
+        //remove the GDAL parser from the default parser
+        Parser defaultParser = new DefaultParser();
+        List<Parser> parsers = new ArrayList<>();
+        for (Parser p : 
((CompositeParser)defaultParser).getAllComponentParsers()) {
+            if (! (p instanceof GDALParser)) {
+                parsers.add(p);
+            }
+        }
+
+        //decorate the gdal parser to exclude these image formats
+        Set<MediaType> exclude = new HashSet<>();
+        exclude.add(MediaType.image("png"));
+        exclude.add(MediaType.image("jpeg"));
+        exclude.add(MediaType.image("bmp"));
+        exclude.add(MediaType.image("gif"));
+
+        Parser specialGDAL = ParserDecorator.withoutTypes(new GDALParser(), 
exclude);
+        parsers.add(specialGDAL);
+
+        Parser autoDetect = new AutoDetectParser(parsers.toArray(new 
Parser[0]));
+        List<Metadata> metadataList = getRecursiveMetadata("testOCR.png", 
autoDetect);
+        assertContains("file contains", 
metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
+
+    }
+
     @Test
     public void testOthers() throws Exception {
         Parser p = loadParser();

Reply via email to