sr...

tallison Wed, 13 May 2015 06:50:27 -0700

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 Wed May 13 13:49:36 2015
@@ -1,38 +1,38 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.example;
-
-import java.io.ByteArrayInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.WriteOutContentHandler;
-import org.springframework.context.ApplicationContext;
-import org.springframework.context.support.ClassPathXmlApplicationContext;
-
-import com.google.common.base.Charsets;
-
-public class SpringExample {
-
-       public static void main(String[] args) throws Exception {
-               ApplicationContext context = new ClassPathXmlApplicationContext(
-                               new String[] { 
"org/apache/tika/example/spring.xml" });
-               Parser parser = context.getBean("tika", Parser.class);
-               parser.parse(new ByteArrayInputStream("Hello, 
World!".getBytes(Charsets.UTF_8)),
-                               new WriteOutContentHandler(System.out), new 
Metadata(),
-                               new ParseContext());
-       }
-
-}
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+import com.google.common.base.Charsets;
+
+public class SpringExample {
+
+       public static void main(String[] args) throws Exception {
+               ApplicationContext context = new ClassPathXmlApplicationContext(
+                               new String[] { 
"org/apache/tika/example/spring.xml" });
+               Parser parser = context.getBean("tika", Parser.class);
+               parser.parse(new ByteArrayInputStream("Hello, 
World!".getBytes(Charsets.UTF_8)),
+                               new WriteOutContentHandler(System.out), new 
Metadata(),
+                               new ParseContext());
+       }
+
+}


Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
 Wed May 13 13:49:36 2015
@@ -1,218 +1,218 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.example;
-
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.io.Reader;
-import java.net.URL;
-import java.nio.CharBuffer;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-import java.util.zip.GZIPInputStream;
-
-import org.apache.tika.Tika;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.CompositeParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserDecorator;
-import org.apache.tika.parser.html.HtmlMapper;
-import org.apache.tika.parser.html.HtmlParser;
-import org.apache.tika.parser.html.IdentityHtmlMapper;
-import org.apache.tika.parser.txt.TXTParser;
-import org.apache.tika.parser.xml.XMLParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.apache.tika.sax.LinkContentHandler;
-import org.apache.tika.sax.TeeContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class TIAParsingExample {
-
-       public static String parseToStringExample() throws Exception {
-               File document = new File("example.doc");
-               String content = new Tika().parseToString(document);
-               System.out.print(content);
-               return content;
-       }
-
-       public static void parseToReaderExample() throws Exception {
-               File document = new File("example.doc");
-               Reader reader = new Tika().parse(document);
-               try {
-                       char[] buffer = new char[1000];
-                       int n = reader.read(buffer);
-                       while (n != -1) {
-                               System.out.append(CharBuffer.wrap(buffer, 0, 
n));
-                               n = reader.read(buffer);
-                       }
-               } finally {
-                       reader.close();
-               }
-       }
-
-       public static void parseFileInputStream(String filename) throws 
Exception {
-               Parser parser = new AutoDetectParser();
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               InputStream stream = new FileInputStream(new File(filename));
-               try {
-                       parser.parse(stream, handler, metadata, context);
-               } finally {
-                       stream.close();
-               }
-       }
-
-       public static void parseURLStream(String address) throws Exception {
-               Parser parser = new AutoDetectParser();
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               InputStream stream = new GZIPInputStream(new 
URL(address).openStream());
-               try {
-                       parser.parse(stream, handler, metadata, context);
-               } finally {
-                       stream.close();
-               }
-       }
-
-       public static void parseTikaInputStream(String filename) throws 
Exception {
-               Parser parser = new AutoDetectParser();
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               InputStream stream = TikaInputStream.get(new File(filename));
-               try {
-                       parser.parse(stream, handler, metadata, context);
-               } finally {
-                       stream.close();
-               }
-       }
-
-       public static File tikaInputStreamGetFile(String filename) throws 
Exception {
-               InputStream stream = TikaInputStream.get(new File(filename));
-               try {
-                       TikaInputStream tikaInputStream = 
TikaInputStream.get(stream);
-                       File file = tikaInputStream.getFile();
-                       return file;
-               } finally {
-                       stream.close();
-               }
-       }
-
-       public static void useHtmlParser() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               Parser parser = new HtmlParser();
-               parser.parse(stream, handler, metadata, context);
-       }
-
-       public static void useCompositeParser() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               ParseContext context = new ParseContext();
-               Map<MediaType, Parser> parsersByType = new HashMap<MediaType, 
Parser>();
-               parsersByType.put(MediaType.parse("text/html"), new 
HtmlParser());
-               parsersByType.put(MediaType.parse("application/xml"), new 
XMLParser());
-
-               CompositeParser parser = new CompositeParser();
-               parser.setParsers(parsersByType);
-               parser.setFallback(new TXTParser());
-
-               Metadata metadata = new Metadata();
-               metadata.set(Metadata.CONTENT_TYPE, "text/html");
-               parser.parse(stream, handler, metadata, context);
-       }
-
-       public static void useAutoDetectParser() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               Parser parser = new AutoDetectParser();
-               parser.parse(stream, handler, metadata, context);
-       }
-
-       public static void testTeeContentHandler(String filename) throws 
Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               Metadata metadata = new Metadata();
-               ParseContext context = new ParseContext();
-               Parser parser = new AutoDetectParser();
-               LinkContentHandler linkCollector = new LinkContentHandler();
-               OutputStream output = new FileOutputStream(new File(filename));
-               try {
-                       ContentHandler handler = new TeeContentHandler(
-                                       new BodyContentHandler(output), 
linkCollector);
-                       parser.parse(stream, handler, metadata, context);
-               } finally {
-                       output.close();
-               }
-       }
-
-       public static void testLocale() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               Parser parser = new AutoDetectParser();
-               ParseContext context = new ParseContext();
-               context.set(Locale.class, Locale.ENGLISH);
-               parser.parse(stream, handler, metadata, context);
-       }
-
-       public static void testHtmlMapper() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               Parser parser = new AutoDetectParser();
-               ParseContext context = new ParseContext();
-               context.set(HtmlMapper.class, new IdentityHtmlMapper());
-               parser.parse(stream, handler, metadata, context);
-       }
-
-       public static void testCompositeDocument() throws Exception {
-               InputStream stream = new ByteArrayInputStream(new byte[0]);
-               ContentHandler handler = new DefaultHandler();
-               Metadata metadata = new Metadata();
-               Parser parser = new AutoDetectParser();
-               ParseContext context = new ParseContext();
-               context.set(Parser.class, new ParserDecorator(parser) {
-                       private static final long serialVersionUID = 
4424210691523343833L;
-
-                       @Override
-                       public void parse(InputStream stream, ContentHandler 
handler,
-                                       Metadata metadata, ParseContext context)
-                                       throws IOException, SAXException, 
TikaException {
-                               // custom processing of the component document
-                       }
-               });
-               parser.parse(stream, handler, metadata, context);
-       }
-
-}
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.net.URL;
+import java.nio.CharBuffer;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.html.HtmlMapper;
+import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.html.IdentityHtmlMapper;
+import org.apache.tika.parser.txt.TXTParser;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TIAParsingExample {
+
+       public static String parseToStringExample() throws Exception {
+               File document = new File("example.doc");
+               String content = new Tika().parseToString(document);
+               System.out.print(content);
+               return content;
+       }
+
+       public static void parseToReaderExample() throws Exception {
+               File document = new File("example.doc");
+               Reader reader = new Tika().parse(document);
+               try {
+                       char[] buffer = new char[1000];
+                       int n = reader.read(buffer);
+                       while (n != -1) {
+                               System.out.append(CharBuffer.wrap(buffer, 0, 
n));
+                               n = reader.read(buffer);
+                       }
+               } finally {
+                       reader.close();
+               }
+       }
+
+       public static void parseFileInputStream(String filename) throws 
Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = new FileInputStream(new File(filename));
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void parseURLStream(String address) throws Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = new GZIPInputStream(new 
URL(address).openStream());
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void parseTikaInputStream(String filename) throws 
Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = TikaInputStream.get(new File(filename));
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static File tikaInputStreamGetFile(String filename) throws 
Exception {
+               InputStream stream = TikaInputStream.get(new File(filename));
+               try {
+                       TikaInputStream tikaInputStream = 
TikaInputStream.get(stream);
+                       File file = tikaInputStream.getFile();
+                       return file;
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void useHtmlParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new HtmlParser();
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void useCompositeParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               ParseContext context = new ParseContext();
+               Map<MediaType, Parser> parsersByType = new HashMap<MediaType, 
Parser>();
+               parsersByType.put(MediaType.parse("text/html"), new 
HtmlParser());
+               parsersByType.put(MediaType.parse("application/xml"), new 
XMLParser());
+
+               CompositeParser parser = new CompositeParser();
+               parser.setParsers(parsersByType);
+               parser.setFallback(new TXTParser());
+
+               Metadata metadata = new Metadata();
+               metadata.set(Metadata.CONTENT_TYPE, "text/html");
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void useAutoDetectParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new AutoDetectParser();
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testTeeContentHandler(String filename) throws 
Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new AutoDetectParser();
+               LinkContentHandler linkCollector = new LinkContentHandler();
+               OutputStream output = new FileOutputStream(new File(filename));
+               try {
+                       ContentHandler handler = new TeeContentHandler(
+                                       new BodyContentHandler(output), 
linkCollector);
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       output.close();
+               }
+       }
+
+       public static void testLocale() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(Locale.class, Locale.ENGLISH);
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testHtmlMapper() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(HtmlMapper.class, new IdentityHtmlMapper());
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testCompositeDocument() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(Parser.class, new ParserDecorator(parser) {
+                       private static final long serialVersionUID = 
4424210691523343833L;
+
+                       @Override
+                       public void parse(InputStream stream, ContentHandler 
handler,
+                                       Metadata metadata, ParseContext context)
+                                       throws IOException, SAXException, 
TikaException {
+                               // custom processing of the component document
+                       }
+               });
+               parser.parse(stream, handler, metadata, context);
+       }
+
+}

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java 
(original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java 
Wed May 13 13:49:36 2015
@@ -1,47 +1,47 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.example;
-
-//JDK imports
-import java.io.IOException;
-import java.util.Collections;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-
-/**
- * 
- *
- * Example code listing from Chapter 1. Lists a zip file's entries using JDK's
- * standard APIs.
- *
- */
-public class ZipListFiles {
-       public static void main(String[] args) throws Exception {
-               if (args.length > 0) {
-                       for (String file : args) {
-                               System.out.println("Files in " + file + " 
file:");
-                               listZipEntries(file);
-                       }
-               }
-       }
-
-       public static void listZipEntries(String path) throws IOException {
-               ZipFile zip = new ZipFile(path);
-               for (ZipEntry entry : Collections.list(zip.entries())) {
-                       System.out.println(entry.getName());
-               }
-       }
-
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+//JDK imports
+import java.io.IOException;
+import java.util.Collections;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+/**
+ * 
+ *
+ * Example code listing from Chapter 1. Lists a zip file's entries using JDK's
+ * standard APIs.
+ *
+ */
+public class ZipListFiles {
+       public static void main(String[] args) throws Exception {
+               if (args.length > 0) {
+                       for (String file : args) {
+                               System.out.println("Files in " + file + " 
file:");
+                               listZipEntries(file);
+                       }
+               }
+       }
+
+       public static void listZipEntries(String path) throws IOException {
+               ZipFile zip = new ZipFile(path);
+               for (ZipEntry entry : Collections.list(zip.entries())) {
+                       System.out.println(entry.getName());
+               }
+       }
+
 }
\ No newline at end of file

Modified: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 (original)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 Wed May 13 13:49:36 2015
@@ -1,52 +1,52 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.example;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-
-import junit.framework.Assert;
-
-import org.apache.commons.io.FileUtils;
-import org.junit.Test;
-
-import com.google.common.base.Charsets;
-
-@SuppressWarnings("deprecation")
-public class SimpleTextExtractorTest {
-
-    @Test
-    public void testSimpleTextExtractor() throws Exception {
-        String message =
-            "Hello, World! This is simple UTF-8 text content written"
-            + " in English to test autodetection of the character"
-            + " encoding of the input stream.";
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-
-        PrintStream out = System.out;
-        System.setOut(new PrintStream(buffer, true, Charsets.UTF_8.name()));
-
-        File file = new File("target", "test.txt");
-        FileUtils.writeStringToFile(file, message);
-        SimpleTextExtractor.main(new String[] { file.getPath() });
-        file.delete();
-
-        System.setOut(out);
-
-        Assert.assertEquals(message, 
buffer.toString(Charsets.UTF_8.name()).trim());
-    }
-
-}
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+@SuppressWarnings("deprecation")
+public class SimpleTextExtractorTest {
+
+    @Test
+    public void testSimpleTextExtractor() throws Exception {
+        String message =
+            "Hello, World! This is simple UTF-8 text content written"
+            + " in English to test autodetection of the character"
+            + " encoding of the input stream.";
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+        PrintStream out = System.out;
+        System.setOut(new PrintStream(buffer, true, Charsets.UTF_8.name()));
+
+        File file = new File("target", "test.txt");
+        FileUtils.writeStringToFile(file, message);
+        SimpleTextExtractor.main(new String[] { file.getPath() });
+        file.delete();
+
+        System.setOut(out);
+
+        Assert.assertEquals(message, 
buffer.toString(Charsets.UTF_8.name()).trim());
+    }
+
+}

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AccessChecker.java
 Wed May 13 13:49:36 2015
@@ -1,80 +1,80 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.tika.parser.pdf;
-
-import java.io.Serializable;
-
-import org.apache.tika.exception.AccessPermissionException;
-import org.apache.tika.metadata.AccessPermissions;
-import org.apache.tika.metadata.Metadata;
-
-/**
- * Checks whether or not a document allows extraction generally
- * or extraction for accessibility only.
- */
-public class AccessChecker implements Serializable {
-
-    private static final long serialVersionUID = 6492570218190936986L;
-
-    private final boolean needToCheck;
-    private final boolean allowAccessibility;
-
-    /**
-     * This constructs an {@link AccessChecker} that
-     * will not perform any checking and will always return without
-     * throwing an exception.
-     * <p>
-     * This constructor is available to allow for Tika's legacy ( <= v1.7) 
behavior.
-     */
-    public AccessChecker() {
-        needToCheck = false;
-        allowAccessibility = true;
-    }
-    /**
-     * This constructs an {@link AccessChecker} that will check
-     * for whether or not content should be extracted from a document.
-     *
-     * @param allowExtractionForAccessibility if general extraction is not 
allowed, is extraction for accessibility allowed
-     */
-    public AccessChecker(boolean allowExtractionForAccessibility) {
-        needToCheck = true;
-        this.allowAccessibility = allowExtractionForAccessibility;
-    }
-
-    /**
-     * Checks to see if a document's content should be extracted based
-     * on metadata values and the value of {@link #allowAccessibility} in the 
constructor.
-     *
-     * @param metadata
-     * @throws AccessPermissionException if access is not permitted
-     */
-    public void check(Metadata metadata) throws AccessPermissionException {
-        if (!needToCheck) {
-            return;
-        }
-        if ("false".equals(metadata.get(AccessPermissions.EXTRACT_CONTENT))) {
-            if (allowAccessibility) {
-                
if("true".equals(metadata.get(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY))) {
-                    return;
-                }
-                throw new AccessPermissionException("Content extraction for 
accessibility is not allowed.");
-            }
-            throw new AccessPermissionException("Content extraction is not 
allowed.");
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.pdf;
+
+import java.io.Serializable;
+
+import org.apache.tika.exception.AccessPermissionException;
+import org.apache.tika.metadata.AccessPermissions;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * Checks whether or not a document allows extraction generally
+ * or extraction for accessibility only.
+ */
+public class AccessChecker implements Serializable {
+
+    private static final long serialVersionUID = 6492570218190936986L;
+
+    private final boolean needToCheck;
+    private final boolean allowAccessibility;
+
+    /**
+     * This constructs an {@link AccessChecker} that
+     * will not perform any checking and will always return without
+     * throwing an exception.
+     * <p>
+     * This constructor is available to allow for Tika's legacy ( <= v1.7) 
behavior.
+     */
+    public AccessChecker() {
+        needToCheck = false;
+        allowAccessibility = true;
+    }
+    /**
+     * This constructs an {@link AccessChecker} that will check
+     * for whether or not content should be extracted from a document.
+     *
+     * @param allowExtractionForAccessibility if general extraction is not 
allowed, is extraction for accessibility allowed
+     */
+    public AccessChecker(boolean allowExtractionForAccessibility) {
+        needToCheck = true;
+        this.allowAccessibility = allowExtractionForAccessibility;
+    }
+
+    /**
+     * Checks to see if a document's content should be extracted based
+     * on metadata values and the value of {@link #allowAccessibility} in the 
constructor.
+     *
+     * @param metadata
+     * @throws AccessPermissionException if access is not permitted
+     */
+    public void check(Metadata metadata) throws AccessPermissionException {
+        if (!needToCheck) {
+            return;
+        }
+        if ("false".equals(metadata.get(AccessPermissions.EXTRACT_CONTENT))) {
+            if (allowAccessibility) {
+                
if("true".equals(metadata.get(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY))) {
+                    return;
+                }
+                throw new AccessPermissionException("Content extraction for 
accessibility is not allowed.");
+            }
+            throw new AccessPermissionException("Content extraction is not 
allowed.");
+        }
+    }
+}

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 Wed May 13 13:49:36 2015
@@ -14,20 +14,20 @@ package org.apache.tika.parser.pdf;
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.pdfbox.util.PDFTextStripper;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Serializable;
-import java.util.Locale;
-import java.util.Properties;
-
-/**
- * Config for PDFParser.
- * 
+ * limitations under the License.
+ */
+
+import org.apache.pdfbox.util.PDFTextStripper;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Serializable;
+import java.util.Locale;
+import java.util.Properties;
+
+/**
+ * Config for PDFParser.
+ * 
  * This allows parameters to be set programmatically:
  * <ol>
  * <li>Calls to PDFParser, i.e. 
parser.getPDFParserConfig().setEnableAutoSpace() (as before)</li>
@@ -77,14 +77,14 @@ public class PDFParserConfig implements
     //The character width-based tolerance value used to estimate where spaces 
in text should be added
     private Float averageCharTolerance;
     
-    //The space width-based tolerance value used to estimate where spaces in 
text should be added
-    private Float spacingTolerance;
-
-    private AccessChecker accessChecker;
-
-    public PDFParserConfig() {
-        init(this.getClass().getResourceAsStream("PDFParser.properties"));
-    }
+    //The space width-based tolerance value used to estimate where spaces in 
text should be added
+    private Float spacingTolerance;
+
+    private AccessChecker accessChecker;
+
+    public PDFParserConfig() {
+        init(this.getClass().getResourceAsStream("PDFParser.properties"));
+    }
 
     /**
      * Loads properties from InputStream and then tries to close InputStream.
@@ -136,24 +136,24 @@ public class PDFParserConfig implements
         setExtractInlineImages(
                 getProp(props.getProperty("extractInlineImages"),
                 getExtractInlineImages()));
-        setExtractUniqueInlineImagesOnly(
-                getProp(props.getProperty("extractUniqueInlineImagesOnly"),
-                getExtractUniqueInlineImagesOnly()));
-
-        boolean checkExtractAccessPermission = 
getProp(props.getProperty("checkExtractAccessPermission"), false);
-        boolean allowExtractionForAccessibility = 
getProp(props.getProperty("allowExtractionForAccessibility"), true);
-
-        if (checkExtractAccessPermission == false) {
-            //silently ignore the crazy configuration of 
checkExtractAccessPermission = false,
-            //but allowExtractionForAccessibility=false
-            accessChecker = new AccessChecker();
-        } else {
-            accessChecker = new AccessChecker(allowExtractionForAccessibility);
-        }
-    }
-    
-    /**
-     * Configures the given pdf2XHTML.
+        setExtractUniqueInlineImagesOnly(
+                getProp(props.getProperty("extractUniqueInlineImagesOnly"),
+                getExtractUniqueInlineImagesOnly()));
+
+        boolean checkExtractAccessPermission = 
getProp(props.getProperty("checkExtractAccessPermission"), false);
+        boolean allowExtractionForAccessibility = 
getProp(props.getProperty("allowExtractionForAccessibility"), true);
+
+        if (checkExtractAccessPermission == false) {
+            //silently ignore the crazy configuration of 
checkExtractAccessPermission = false,
+            //but allowExtractionForAccessibility=false
+            accessChecker = new AccessChecker();
+        } else {
+            accessChecker = new AccessChecker(allowExtractionForAccessibility);
+        }
+    }
+    
+    /**
+     * Configures the given pdf2XHTML.
      * 
      * @param pdf2XHTML
      */
@@ -342,20 +342,20 @@ public class PDFParserConfig implements
 
     /**
      * See {@link PDFTextStripper#setSpacingTolerance(float)}
-     */
-    public void setSpacingTolerance(Float spacingTolerance) {
-        this.spacingTolerance = spacingTolerance;
-    }
-
-    public void setAccessChecker(AccessChecker accessChecker) {
-        this.accessChecker = accessChecker;
-    }
-
-    public AccessChecker getAccessChecker() {
-        return accessChecker;
-    }
-
-    private boolean getProp(String p, boolean defaultMissing){
+     */
+    public void setSpacingTolerance(Float spacingTolerance) {
+        this.spacingTolerance = spacingTolerance;
+    }
+
+    public void setAccessChecker(AccessChecker accessChecker) {
+        this.accessChecker = accessChecker;
+    }
+
+    public AccessChecker getAccessChecker() {
+        return accessChecker;
+    }
+
+    private boolean getProp(String p, boolean defaultMissing){
         if (p == null){
             return defaultMissing;
         }

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/RarParser.java 
Wed May 13 13:49:36 2015
@@ -1,117 +1,117 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.pkg;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import org.apache.tika.exception.EncryptedDocumentException;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.extractor.EmbeddedDocumentExtractor;
-import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
-import org.apache.tika.io.TemporaryResources;
-import org.apache.tika.io.TikaInputStream;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-import com.github.junrar.Archive;
-import com.github.junrar.exception.RarException;
-import com.github.junrar.rarfile.FileHeader;
-
-/**
- * Parser for Rar files.
- */
-public class RarParser extends AbstractParser {
-    private static final long serialVersionUID = 6157727985054451501L;
-    
-    private static final Set<MediaType> SUPPORTED_TYPES = Collections
-            .singleton(MediaType.application("x-rar-compressed"));
-
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext arg0) {
-        return SUPPORTED_TYPES;
-    }
-
-    @Override
-    public void parse(InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context) throws IOException,
-            SAXException, TikaException {
-
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-
-        EmbeddedDocumentExtractor extractor = context.get(
-                EmbeddedDocumentExtractor.class,
-                new ParsingEmbeddedDocumentExtractor(context));
-
-        TemporaryResources tmp = new TemporaryResources();
-        Archive rar = null;
-        try {
-            TikaInputStream tis = TikaInputStream.get(stream, tmp);
-            rar = new Archive(tis.getFile());
-
-            if (rar.isEncrypted()) {
-                throw new EncryptedDocumentException();
-            }
-
-            //Without this BodyContentHandler does not work
-            xhtml.element("div", " ");
-
-            FileHeader header = rar.nextFileHeader();
-            while (header != null && !Thread.currentThread().isInterrupted()) {
-                if (!header.isDirectory()) {
-                    InputStream subFile = null;
-                    try {
-                        subFile = rar.getInputStream(header);
-
-                        Metadata entrydata = PackageParser.handleEntryMetadata(
-                                
"".equals(header.getFileNameW())?header.getFileNameString():header.getFileNameW(),
-                                header.getCTime(), header.getMTime(),
-                                header.getFullUnpackSize(),
-                                xhtml
-                        );
-
-                        if (extractor.shouldParseEmbedded(entrydata)) {
-                            extractor.parseEmbedded(subFile, handler, 
entrydata, true);
-                        }
-                    } finally {
-                        if (subFile != null)
-                            subFile.close();
-                    }
-                }
-
-                header = rar.nextFileHeader();
-            }
-
-        } catch (RarException e) {
-            throw new TikaException("RarParser Exception", e);
-        } finally {
-            if (rar != null)
-                rar.close();
-            tmp.close();
-        }
-
-        xhtml.endDocument();
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.pkg;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.EncryptedDocumentException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.github.junrar.Archive;
+import com.github.junrar.exception.RarException;
+import com.github.junrar.rarfile.FileHeader;
+
+/**
+ * Parser for Rar files.
+ */
+public class RarParser extends AbstractParser {
+    private static final long serialVersionUID = 6157727985054451501L;
+    
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections
+            .singleton(MediaType.application("x-rar-compressed"));
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext arg0) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        EmbeddedDocumentExtractor extractor = context.get(
+                EmbeddedDocumentExtractor.class,
+                new ParsingEmbeddedDocumentExtractor(context));
+
+        TemporaryResources tmp = new TemporaryResources();
+        Archive rar = null;
+        try {
+            TikaInputStream tis = TikaInputStream.get(stream, tmp);
+            rar = new Archive(tis.getFile());
+
+            if (rar.isEncrypted()) {
+                throw new EncryptedDocumentException();
+            }
+
+            //Without this BodyContentHandler does not work
+            xhtml.element("div", " ");
+
+            FileHeader header = rar.nextFileHeader();
+            while (header != null && !Thread.currentThread().isInterrupted()) {
+                if (!header.isDirectory()) {
+                    InputStream subFile = null;
+                    try {
+                        subFile = rar.getInputStream(header);
+
+                        Metadata entrydata = PackageParser.handleEntryMetadata(
+                                
"".equals(header.getFileNameW())?header.getFileNameString():header.getFileNameW(),
+                                header.getCTime(), header.getMTime(),
+                                header.getFullUnpackSize(),
+                                xhtml
+                        );
+
+                        if (extractor.shouldParseEmbedded(entrydata)) {
+                            extractor.parseEmbedded(subFile, handler, 
entrydata, true);
+                        }
+                    } finally {
+                        if (subFile != null)
+                            subFile.close();
+                    }
+                }
+
+                header = rar.nextFileHeader();
+            }
+
+        } catch (RarException e) {
+            throw new TikaException("RarParser Exception", e);
+        } finally {
+            if (rar != null)
+                rar.close();
+            tmp.close();
+        }
+
+        xhtml.endDocument();
+    }
+}

Modified: 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
 (original)
+++ 
tika/trunk/tika-parsers/src/main/resources/org/apache/tika/parser/pdf/PDFParser.properties
 Wed May 13 13:49:36 2015
@@ -18,8 +18,8 @@ extractAnnotationText true
 sortByPosition false
 suppressDuplicateOverlappingText       false
 useNonSequentialParser false
-extractAcroFormContent true
-extractInlineImages false
-extractUniqueInlineImagesOnly true
-checkExtractAccessPermission false
-allowExtractionForAccessibility true
+extractAcroFormContent true
+extractInlineImages false
+extractUniqueInlineImagesOnly true
+checkExtractAccessPermission false
+allowExtractionForAccessibility true

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
 Wed May 13 13:49:36 2015
@@ -1,246 +1,246 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import static junit.framework.TestCase.assertEquals;
-import static junit.framework.TestCase.assertTrue;
-import static org.junit.Assert.fail;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.Date;
-
-import org.apache.tika.TikaTest;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-
-public class MockParserTest extends TikaTest {
-    private final static String M = "/test-documents/mock/";
-    private final static Parser PARSER = new AutoDetectParser();
-
-    @Override
-    public XMLResult getXML(String path, Metadata m) throws Exception {
-        //note that this is specific to MockParserTest with addition of M to 
the path!
-        InputStream is = getResourceAsStream(M+path);
-        try {
-            return super.getXML(is, PARSER, m);
-        } finally {
-            IOUtils.closeQuietly(is);
-        }
-    }
-
-    @Test
-    public void testExample() throws Exception {
-        Metadata m = new Metadata();
-        PrintStream out = System.out;
-        PrintStream err = System.err;
-        ByteArrayOutputStream outBos = new ByteArrayOutputStream();
-        ByteArrayOutputStream errBos = new ByteArrayOutputStream();
-        PrintStream tmpOut = new PrintStream(outBos, true, 
IOUtils.UTF_8.toString());
-        PrintStream tmpErr = new PrintStream(errBos, true, 
IOUtils.UTF_8.toString());
-        System.setOut(tmpOut);
-        System.setErr(tmpErr);
-        try {
-            assertThrowable("example.xml", m, IOException.class, "not another 
IOException");
-            assertMockParser(m);
-        } finally {
-            System.setOut(out);
-            System.setErr(err);
-        }
-        String outString = new String(outBos.toByteArray(), IOUtils.UTF_8);
-        assertContains("writing to System.out", outString);
-
-        String errString = new String(errBos.toByteArray(), IOUtils.UTF_8);
-        assertContains("writing to System.err", errString);
-
-    }
-
-    @Test
-    public void testNothingBad() throws Exception {
-        Metadata m = new Metadata();
-        String content = getXML("nothing_bad.xml", m).xml;
-        assertEquals("Geoffrey Chaucer", m.get("author"));
-        assertContains("<p>And bathed every veyne in swich licour,</p>", 
content);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testNullPointer() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("null_pointer.xml", m, NullPointerException.class, 
"another null pointer exception");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testNullPointerNoMsg() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("null_pointer_no_msg.xml", m, 
NullPointerException.class, null);
-        assertMockParser(m);
-    }
-
-
-    @Test
-    public void testSleep() throws Exception {
-        long start = new Date().getTime();
-        Metadata m = new Metadata();
-        String content = getXML("sleep.xml", m).xml;
-        assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
-        //should sleep for at least 3000
-        boolean enoughTimeHasElapsed = elapsed > 2000;
-        assertTrue("not enough time has not elapsed: "+elapsed, 
enoughTimeHasElapsed);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testHeavyHang() throws Exception {
-        long start = new Date().getTime();
-        Metadata m = new Metadata();
-
-        String content = getXML("heavy_hang.xml", m).xml;
-        assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
-        //should sleep for at least 3000
-        boolean enoughTimeHasElapsed = elapsed > 2000;
-        assertTrue("not enough time has elapsed: "+elapsed, 
enoughTimeHasElapsed);
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testFakeOOM() throws Exception {
-        Metadata m = new Metadata();
-        assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not 
another oom");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testRealOOM() throws Exception {
-        //Note: we're not actually testing the diff between fake and real oom
-        //i.e. by creating child process and setting different -Xmx or
-        //memory profiling.
-        Metadata m = new Metadata();
-        assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap 
space");
-        assertMockParser(m);
-    }
-
-    @Test
-    public void testInterruptibleSleep() {
-        //Without static initialization of the parser, it can take ~1 second 
after t.start()
-        //before the parser actually calls parse.  This is
-        //just the time it takes to instantiate and call AutoDetectParser, do 
the detection, etc.
-        //This is not thread creation overhead.
-        ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
-        Thread t = new Thread(r);
-        t.start();
-        long start = new Date().getTime();
-        try {
-            Thread.sleep(1000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-
-        t.interrupt();
-
-        try {
-            t.join(10000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        long elapsed = new Date().getTime()-start;
-        boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
-        assertTrue("elapsed (" + elapsed + " millis) was not short enough", 
shortEnough);
-    }
-
-    @Test
-    public void testNonInterruptibleSleep() {
-        ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
-        Thread t = new Thread(r);
-        t.start();
-        long start = new Date().getTime();
-        try {
-            //make sure that the thread has actually started
-            Thread.sleep(1000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        t.interrupt();
-        try {
-            t.join(20000);
-        } catch (InterruptedException e) {
-            //swallow
-        }
-        long elapsed = new Date().getTime()-start;
-        boolean longEnough = elapsed > 3000;//the xml file specifies 3000, 
this sleeps 1000
-        assertTrue("elapsed ("+elapsed+" millis) was not long enough", 
longEnough);
-    }
-
-    private class ParserRunnable implements Runnable {
-        private final String path;
-        ParserRunnable(String path) {
-            this.path = path;
-        }
-        @Override
-        public void run() {
-            Metadata m = new Metadata();
-            try {
-                getXML(path, m);
-            } catch (Exception e) {
-                throw new RuntimeException(e);
-            } finally {
-                assertMockParser(m);
-            }
-        }
-    }
-
-    private void assertThrowable(String path, Metadata m, Class<? extends 
Throwable> expected, String message) {
-
-        try {
-            getXML(path, m);
-        } catch (Throwable t) {
-            //if this is a throwable wrapped in a TikaException, use the cause
-            if (t instanceof TikaException && t.getCause() != null) {
-                t = t.getCause();
-            }
-            if (! (t.getClass().isAssignableFrom(expected))){
-                fail(t.getClass() +" is not assignable from "+expected);
-            }
-            if (message != null) {
-                assertEquals(message, t.getMessage());
-            }
-        }
-    }
-
-    private void assertMockParser(Metadata m) {
-        String[] parsers = m.getValues("X-Parsed-By");
-        //make sure that it was actually parsed by mock.
-        boolean parsedByMock = false;
-        for (String parser : parsers) {
-            if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
-                parsedByMock = true;
-                break;
-            }
-        }
-        assertTrue("mock parser should have been called", parsedByMock);
-    }
-}
+package org.apache.tika.parser.mock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.Date;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+
+public class MockParserTest extends TikaTest {
+    private final static String M = "/test-documents/mock/";
+    private final static Parser PARSER = new AutoDetectParser();
+
+    @Override
+    public XMLResult getXML(String path, Metadata m) throws Exception {
+        //note that this is specific to MockParserTest with addition of M to 
the path!
+        InputStream is = getResourceAsStream(M+path);
+        try {
+            return super.getXML(is, PARSER, m);
+        } finally {
+            IOUtils.closeQuietly(is);
+        }
+    }
+
+    @Test
+    public void testExample() throws Exception {
+        Metadata m = new Metadata();
+        PrintStream out = System.out;
+        PrintStream err = System.err;
+        ByteArrayOutputStream outBos = new ByteArrayOutputStream();
+        ByteArrayOutputStream errBos = new ByteArrayOutputStream();
+        PrintStream tmpOut = new PrintStream(outBos, true, 
IOUtils.UTF_8.toString());
+        PrintStream tmpErr = new PrintStream(errBos, true, 
IOUtils.UTF_8.toString());
+        System.setOut(tmpOut);
+        System.setErr(tmpErr);
+        try {
+            assertThrowable("example.xml", m, IOException.class, "not another 
IOException");
+            assertMockParser(m);
+        } finally {
+            System.setOut(out);
+            System.setErr(err);
+        }
+        String outString = new String(outBos.toByteArray(), IOUtils.UTF_8);
+        assertContains("writing to System.out", outString);
+
+        String errString = new String(errBos.toByteArray(), IOUtils.UTF_8);
+        assertContains("writing to System.err", errString);
+
+    }
+
+    @Test
+    public void testNothingBad() throws Exception {
+        Metadata m = new Metadata();
+        String content = getXML("nothing_bad.xml", m).xml;
+        assertEquals("Geoffrey Chaucer", m.get("author"));
+        assertContains("<p>And bathed every veyne in swich licour,</p>", 
content);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testNullPointer() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("null_pointer.xml", m, NullPointerException.class, 
"another null pointer exception");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testNullPointerNoMsg() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("null_pointer_no_msg.xml", m, 
NullPointerException.class, null);
+        assertMockParser(m);
+    }
+
+
+    @Test
+    public void testSleep() throws Exception {
+        long start = new Date().getTime();
+        Metadata m = new Metadata();
+        String content = getXML("sleep.xml", m).xml;
+        assertMockParser(m);
+        long elapsed = new Date().getTime()-start;
+        //should sleep for at least 3000
+        boolean enoughTimeHasElapsed = elapsed > 2000;
+        assertTrue("not enough time has not elapsed: "+elapsed, 
enoughTimeHasElapsed);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testHeavyHang() throws Exception {
+        long start = new Date().getTime();
+        Metadata m = new Metadata();
+
+        String content = getXML("heavy_hang.xml", m).xml;
+        assertMockParser(m);
+        long elapsed = new Date().getTime()-start;
+        //should sleep for at least 3000
+        boolean enoughTimeHasElapsed = elapsed > 2000;
+        assertTrue("not enough time has elapsed: "+elapsed, 
enoughTimeHasElapsed);
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testFakeOOM() throws Exception {
+        Metadata m = new Metadata();
+        assertThrowable("fake_oom.xml", m, OutOfMemoryError.class, "not 
another oom");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testRealOOM() throws Exception {
+        //Note: we're not actually testing the diff between fake and real oom
+        //i.e. by creating child process and setting different -Xmx or
+        //memory profiling.
+        Metadata m = new Metadata();
+        assertThrowable("real_oom.xml", m, OutOfMemoryError.class, "Java heap 
space");
+        assertMockParser(m);
+    }
+
+    @Test
+    public void testInterruptibleSleep() {
+        //Without static initialization of the parser, it can take ~1 second 
after t.start()
+        //before the parser actually calls parse.  This is
+        //just the time it takes to instantiate and call AutoDetectParser, do 
the detection, etc.
+        //This is not thread creation overhead.
+        ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
+        Thread t = new Thread(r);
+        t.start();
+        long start = new Date().getTime();
+        try {
+            Thread.sleep(1000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+
+        t.interrupt();
+
+        try {
+            t.join(10000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        long elapsed = new Date().getTime()-start;
+        boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
+        assertTrue("elapsed (" + elapsed + " millis) was not short enough", 
shortEnough);
+    }
+
+    @Test
+    public void testNonInterruptibleSleep() {
+        ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
+        Thread t = new Thread(r);
+        t.start();
+        long start = new Date().getTime();
+        try {
+            //make sure that the thread has actually started
+            Thread.sleep(1000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        t.interrupt();
+        try {
+            t.join(20000);
+        } catch (InterruptedException e) {
+            //swallow
+        }
+        long elapsed = new Date().getTime()-start;
+        boolean longEnough = elapsed > 3000;//the xml file specifies 3000, 
this sleeps 1000
+        assertTrue("elapsed ("+elapsed+" millis) was not long enough", 
longEnough);
+    }
+
+    private class ParserRunnable implements Runnable {
+        private final String path;
+        ParserRunnable(String path) {
+            this.path = path;
+        }
+        @Override
+        public void run() {
+            Metadata m = new Metadata();
+            try {
+                getXML(path, m);
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            } finally {
+                assertMockParser(m);
+            }
+        }
+    }
+
+    private void assertThrowable(String path, Metadata m, Class<? extends 
Throwable> expected, String message) {
+
+        try {
+            getXML(path, m);
+        } catch (Throwable t) {
+            //if this is a throwable wrapped in a TikaException, use the cause
+            if (t instanceof TikaException && t.getCause() != null) {
+                t = t.getCause();
+            }
+            if (! (t.getClass().isAssignableFrom(expected))){
+                fail(t.getClass() +" is not assignable from "+expected);
+            }
+            if (message != null) {
+                assertEquals(message, t.getMessage());
+            }
+        }
+    }
+
+    private void assertMockParser(Metadata m) {
+        String[] parsers = m.getValues("X-Parsed-By");
+        //make sure that it was actually parsed by mock.
+        boolean parsedByMock = false;
+        for (String parser : parsers) {
+            if (parser.equals("org.apache.tika.parser.mock.MockParser")) {
+                parsedByMock = true;
+                break;
+            }
+        }
+        assertTrue("mock parser should have been called", parsedByMock);
+    }
+}

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/AccessCheckerTest.java
 Wed May 13 13:49:36 2015
@@ -1,137 +1,137 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.pdf;
-
-
-import static org.junit.Assert.assertTrue;
-
-import org.apache.tika.exception.AccessPermissionException;
-import org.apache.tika.metadata.AccessPermissions;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.PropertyTypeException;
-import org.junit.Test;
-
-public class AccessCheckerTest {
-
-    @Test
-    public void testLegacy() throws AccessPermissionException{
-
-        Metadata m = getMetadata(false, false);
-        //legacy behavior; don't bother checking
-        AccessChecker checker = new AccessChecker();
-        checker.check(m);
-        assertTrue("no exception", true);
-
-        m = getMetadata(false, true);
-        assertTrue("no exception", true);
-        checker.check(m);
-
-        m = getMetadata(true, true);
-        assertTrue("no exception", true);
-        checker.check(m);
-    }
-
-    @Test
-    public void testNoExtraction() {
-
-        Metadata m = null;
-        //allow nothing
-        AccessChecker checker = new AccessChecker(false);
-        boolean ex = false;
-        try {
-            m = getMetadata(false, false);
-            checker.check(m);
-        } catch (AccessPermissionException e) {
-            ex = true;
-        }
-        assertTrue("correct exception with no extraction, no extract for 
accessibility", ex);
-        ex = false;
-        try {
-            //document allows extraction for accessibility
-            m = getMetadata(false, true);
-            checker.check(m);
-        } catch (AccessPermissionException e) {
-            //but application is not an accessibility application
-            ex = true;
-        }
-        assertTrue("correct exception with no extraction, no extract for 
accessibility", ex);
-    }
-
-    @Test
-    public void testExtractOnlyForAccessibility() throws 
AccessPermissionException {
-        Metadata m = getMetadata(false, true);
-        //allow accessibility
-        AccessChecker checker = new AccessChecker(true);
-        checker.check(m);
-        assertTrue("no exception", true);
-        boolean ex = false;
-        try {
-            m = getMetadata(false, false);
-            checker.check(m);
-        } catch (AccessPermissionException e) {
-            ex = true;
-        }
-        assertTrue("correct exception", ex);
-    }
-
-    @Test
-    public void testCrazyExtractNotForAccessibility() throws 
AccessPermissionException {
-        Metadata m = getMetadata(true, false);
-        //allow accessibility
-        AccessChecker checker = new AccessChecker(true);
-        checker.check(m);
-        assertTrue("no exception", true);
-
-        //don't extract for accessibility
-        checker = new AccessChecker(false);
-        //if extract content is allowed, the checker shouldn't
-        //check the value of extract for accessibility
-        checker.check(m);
-        assertTrue("no exception", true);
-
-    }
-
-    @Test
-    public void testCantAddMultiplesToMetadata() {
-        Metadata m = new Metadata();
-        boolean ex = false;
-        m.add(AccessPermissions.EXTRACT_CONTENT, "true");
-        try {
-            m.add(AccessPermissions.EXTRACT_CONTENT, "false");
-        } catch (PropertyTypeException e) {
-            ex = true;
-        }
-        assertTrue("can't add multiple values", ex);
-
-        m = new Metadata();
-        ex = false;
-        m.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, "true");
-        try {
-            m.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, "false");
-        } catch (PropertyTypeException e) {
-            ex = true;
-        }
-        assertTrue("can't add multiple values", ex);
-    }
-
-    private Metadata getMetadata(boolean allowExtraction, boolean 
allowExtractionForAccessibility) {
-        Metadata m = new Metadata();
-        m.set(AccessPermissions.EXTRACT_CONTENT, 
Boolean.toString(allowExtraction));
-        m.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, 
Boolean.toString(allowExtractionForAccessibility));
-        return m;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.pdf;
+
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.exception.AccessPermissionException;
+import org.apache.tika.metadata.AccessPermissions;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.PropertyTypeException;
+import org.junit.Test;
+
+public class AccessCheckerTest {
+
+    @Test
+    public void testLegacy() throws AccessPermissionException{
+
+        Metadata m = getMetadata(false, false);
+        //legacy behavior; don't bother checking
+        AccessChecker checker = new AccessChecker();
+        checker.check(m);
+        assertTrue("no exception", true);
+
+        m = getMetadata(false, true);
+        assertTrue("no exception", true);
+        checker.check(m);
+
+        m = getMetadata(true, true);
+        assertTrue("no exception", true);
+        checker.check(m);
+    }
+
+    @Test
+    public void testNoExtraction() {
+
+        Metadata m = null;
+        //allow nothing
+        AccessChecker checker = new AccessChecker(false);
+        boolean ex = false;
+        try {
+            m = getMetadata(false, false);
+            checker.check(m);
+        } catch (AccessPermissionException e) {
+            ex = true;
+        }
+        assertTrue("correct exception with no extraction, no extract for 
accessibility", ex);
+        ex = false;
+        try {
+            //document allows extraction for accessibility
+            m = getMetadata(false, true);
+            checker.check(m);
+        } catch (AccessPermissionException e) {
+            //but application is not an accessibility application
+            ex = true;
+        }
+        assertTrue("correct exception with no extraction, no extract for 
accessibility", ex);
+    }
+
+    @Test
+    public void testExtractOnlyForAccessibility() throws 
AccessPermissionException {
+        Metadata m = getMetadata(false, true);
+        //allow accessibility
+        AccessChecker checker = new AccessChecker(true);
+        checker.check(m);
+        assertTrue("no exception", true);
+        boolean ex = false;
+        try {
+            m = getMetadata(false, false);
+            checker.check(m);
+        } catch (AccessPermissionException e) {
+            ex = true;
+        }
+        assertTrue("correct exception", ex);
+    }
+
+    @Test
+    public void testCrazyExtractNotForAccessibility() throws 
AccessPermissionException {
+        Metadata m = getMetadata(true, false);
+        //allow accessibility
+        AccessChecker checker = new AccessChecker(true);
+        checker.check(m);
+        assertTrue("no exception", true);
+
+        //don't extract for accessibility
+        checker = new AccessChecker(false);
+        //if extract content is allowed, the checker shouldn't
+        //check the value of extract for accessibility
+        checker.check(m);
+        assertTrue("no exception", true);
+
+    }
+
+    @Test
+    public void testCantAddMultiplesToMetadata() {
+        Metadata m = new Metadata();
+        boolean ex = false;
+        m.add(AccessPermissions.EXTRACT_CONTENT, "true");
+        try {
+            m.add(AccessPermissions.EXTRACT_CONTENT, "false");
+        } catch (PropertyTypeException e) {
+            ex = true;
+        }
+        assertTrue("can't add multiple values", ex);
+
+        m = new Metadata();
+        ex = false;
+        m.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, "true");
+        try {
+            m.add(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, "false");
+        } catch (PropertyTypeException e) {
+            ex = true;
+        }
+        assertTrue("can't add multiple values", ex);
+    }
+
+    private Metadata getMetadata(boolean allowExtraction, boolean 
allowExtractionForAccessibility) {
+        Metadata m = new Metadata();
+        m.set(AccessPermissions.EXTRACT_CONTENT, 
Boolean.toString(allowExtraction));
+        m.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, 
Boolean.toString(allowExtractionForAccessibility));
+        return m;
+    }
+}

svn commit: r1679211 [4/7] - in /tika/trunk: tika-app/src/main/java/org/apache/tika/cli/ tika-app/src/main/resources/ tika-app/src/test/java/org/apache/tika/cli/ tika-app/src/test/resources/ tika-batch/src/main/java/org/apache/tika/batch/ tika-batch/sr...

Reply via email to