Author: nick
Date: Sat Feb 28 13:01:36 2015
New Revision: 1662927

URL: http://svn.apache.org/r1662927
Log:
Start on unit testing for the new TIKA-1558 style parser blacklisting

Added:
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
    tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/
    
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklist.xml

Added: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1662927&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
 (added)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
 Sat Feb 28 13:01:36 2015
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.config;
+
+import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertNotContained;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.net.URL;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.EmptyParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.executable.ExecutableParser;
+import org.junit.After;
+import org.junit.Test;
+
+/**
+ * Junit test class for {@link TikaConfig}, which cover things
+ *  that {@link TikaConfigTest} can't do due to a need for the
+ *  full set of parsers
+ */
+public class TikaParserConfigTest {
+    protected static ParseContext context = new ParseContext();
+    protected static TikaConfig getConfig(String config) throws Exception {
+        URL url = TikaConfig.class.getResource(config);
+        System.setProperty("tika.config", url.toExternalForm());
+        return new TikaConfig();
+    }
+    @After
+    public void resetConfig() {
+        System.clearProperty("tika.config");
+    }
+    
+    @Test
+    public void testMimeExcludeInclude() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        Parser parser = config.getParser();
+        
+        MediaType PDF = MediaType.application("pdf");
+        MediaType JPEG = MediaType.image("jpeg");
+        
+        
+        // Has two parsers
+        assertEquals(CompositeParser.class, parser.getClass());
+        CompositeParser cParser = (CompositeParser)parser;
+        assertEquals(2, cParser.getAllComponentParsers().size());
+        
+        // Both are decorated
+        assertTrue(cParser.getAllComponentParsers().get(0) instanceof 
ParserDecorator);
+        assertTrue(cParser.getAllComponentParsers().get(1) instanceof 
ParserDecorator);
+        ParserDecorator p0 = 
(ParserDecorator)cParser.getAllComponentParsers().get(0);
+        ParserDecorator p1 = 
(ParserDecorator)cParser.getAllComponentParsers().get(1);
+        
+        
+        // DefaultParser will be wrapped with excludes
+        assertEquals(DefaultParser.class, p0.getWrappedParser().getClass());
+        
+        assertNotContained(PDF, p0.getSupportedTypes(context));
+        assertContains(PDF, p0.getWrappedParser().getSupportedTypes(context));
+        assertNotContained(JPEG, p0.getSupportedTypes(context));
+        assertContains(JPEG, p0.getWrappedParser().getSupportedTypes(context));
+        
+        
+        // Will have an empty parser for PDF
+        assertEquals(EmptyParser.class, p1.getWrappedParser().getClass());
+        assertEquals(1, p1.getSupportedTypes(context).size());
+        assertContains(PDF, p1.getSupportedTypes(context));
+        assertNotContained(PDF, 
p1.getWrappedParser().getSupportedTypes(context));
+    }
+    
+    @Test
+    public void testParserExcludeFromDefault() throws Exception {
+        TikaConfig config = getConfig("TIKA-1558-blacklist.xml");
+        CompositeParser parser = (CompositeParser)config.getParser();
+        
+        MediaType PE_EXE = MediaType.application("x-msdownload");
+        MediaType ELF = MediaType.application("x-elf");
+        
+        
+        // Get the DefaultParser from the config
+        ParserDecorator confWrappedParser = 
(ParserDecorator)parser.getParsers().get(MediaType.APPLICATION_XML);
+        assertNotNull(confWrappedParser);
+        DefaultParser confParser = 
(DefaultParser)confWrappedParser.getWrappedParser();
+        
+        // Get a fresh "default" DefaultParser
+        DefaultParser normParser = new 
DefaultParser(config.getMediaTypeRegistry());
+        
+        
+        // The default one will offer the Executable Parser
+        assertContains(PE_EXE, normParser.getSupportedTypes(context));
+        assertContains(ELF, normParser.getSupportedTypes(context));
+        
+        boolean hasExec = false;
+        for (Parser p : normParser.getParsers().values()) {
+            if (p instanceof ExecutableParser) {
+                hasExec = true;
+                break;
+            }
+        }
+        assertTrue(hasExec);
+        
+        
+        // The one from the config won't
+        // TODO - Finish this
+/*
+        assertNotContained(PE_EXE, confParser.getSupportedTypes(context));
+        assertNotContained(ELF, confParser.getSupportedTypes(context));
+        
+        for (Parser p : confParser.getParsers().values()) {
+            if (p instanceof ExecutableParser)
+                fail("Shouldn't have the Executable Parser from config");
+        }
+*/
+    }
+}

Added: 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklist.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklist.xml?rev=1662927&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklist.xml
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklist.xml
 Sat Feb 28 13:01:36 2015
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.DefaultParser">
+      <mime-exclude>image/jpeg</mime-exclude>
+      <mime-exclude>application/pdf</mime-exclude>
+      <parser-exclude 
class="org.apache.tika.parser.executable.ExecutableParser"/>
+    </parser>
+    <parser class="org.apache.tika.parser.EmptyParser">
+      <mime>application/pdf</mime>
+    </parser>
+  </parsers>
+</properties>


Reply via email to