Author: bob
Date: Mon Dec 28 23:22:46 2015
New Revision: 1722029

URL: http://svn.apache.org/viewvc?rev=1722029&view=rev
Log:
TIKA-1812 - Moving multimedia sources to module.

Added:
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/BPGParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/MetadataFields.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/TiffParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/WebPParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MpegStream.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/DirectFileReadDataSource.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/video/FLVParser.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/AudioParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/audio/MidiParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/BPGParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/WebPParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/xmp/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/mp3/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/mp3/MpegStreamTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/mp4/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/video/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/parser/video/FLVParserTest.java
Removed:
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/module/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/java/org/apache/tika/module/
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/resources/hadoop.jpg
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/resources/testFLV.flv
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/resources/testMID.mid
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/resources/testMP3i18n.mp3
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/test/resources/tika.png
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/audio/
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/image/
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/mp3/
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/mp4/
    tika/branches/2.x/tika-parsers/src/main/java/org/apache/tika/parser/video/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/audio/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/image/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/mp3/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/mp4/
    tika/branches/2.x/tika-parsers/src/test/java/org/apache/tika/parser/video/
Modified:
    tika/branches/2.x/tika-parser-modules/pom.xml
    tika/branches/2.x/tika-parser-modules/tika-multimedia-module/pom.xml
    
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser

Modified: tika/branches/2.x/tika-parser-modules/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/pom.xml?rev=1722029&r1=1722028&r2=1722029&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/pom.xml (original)
+++ tika/branches/2.x/tika-parser-modules/pom.xml Mon Dec 28 23:22:46 2015
@@ -33,59 +33,28 @@
   <name>Apache Tika Parser Modules</name>
   <url>http://tika.apache.org/</url>
   
-  <modules>
+  <properties>
+    <poi.version>3.13</poi.version>
+    <!-- NOTE: sync codec version with POI -->
+    <codec.version>1.9</codec.version>
+    <pdfbox.version>1.8.10</pdfbox.version>
+  </properties>
+  
+  <!-- <modules>
     <module>tika-multimedia-module</module>
   </modules>
-  
+   -->
   <dependencies>
-    <!-- Optional OSGi dependencies, used only when running within OSGi -->
-    <dependency>
-      <groupId>org.osgi</groupId>
-      <artifactId>org.osgi.core</artifactId>
-      <scope>provided</scope>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>org.osgi</groupId>
-      <artifactId>org.osgi.compendium</artifactId>
-      <scope>provided</scope>
-      <optional>true</optional>
-    </dependency>
-    <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>tika-parsers</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
     <!-- Test dependencies -->
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
-     <dependency>
-      <groupId>org.ops4j.pax.exam</groupId>
-      <artifactId>pax-exam-junit4</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.ops4j.pax.exam</groupId>
-      <artifactId>pax-exam-container-native</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.felix</groupId>
-      <artifactId>org.apache.felix.framework</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
-      <groupId>org.ops4j.pax.exam</groupId>
-      <artifactId>pax-exam-link-assembly</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.ops4j.pax.url</groupId>
-      <artifactId>pax-url-aether</artifactId>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <version>1.7</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -93,10 +62,37 @@
       <artifactId>slf4j-simple</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>javax.inject</groupId>
-      <artifactId>javax.inject</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies> 
+  <build>
+    <pluginManagement>
+      <plugins>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-dependency-plugin</artifactId>
+          <version>2.10</version>
+          <executions>
+            <execution>
+              <id>unpack</id>
+              <phase>compile</phase>
+              <goals>
+                <goal>unpack</goal>
+              </goals>
+              <configuration>
+                <artifactItems>
+                  <artifactItem>
+                    <groupId>${project.groupId}</groupId>
+                    <artifactId>tika-parser-test</artifactId>
+                    <version>${project.version}</version>
+                    <type>jar</type>
+                    <overWrite>true</overWrite>
+                    
<outputDirectory>${project.build.testOutputDirectory}</outputDirectory>
+                  </artifactItem>
+                </artifactItems>
+              </configuration>
+            </execution>
+          </executions>
+        </plugin>
+      </plugins>
+    </pluginManagement>
+  </build>
 </project>
\ No newline at end of file

Modified: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/pom.xml?rev=1722029&r1=1722028&r2=1722029&view=diff
==============================================================================
--- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/pom.xml 
(original)
+++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/pom.xml Mon 
Dec 28 23:22:46 2015
@@ -20,74 +20,91 @@
   </parent>
 
   <artifactId>tika-multimedia-module</artifactId>
-  <packaging>bundle</packaging>
   <name>Apache Tika Multimedia Module</name>
   <url>http://tika.apache.org/</url>
+  
+  <properties>
+    <metadata.extractor.version>2.8.0</metadata.extractor.version>
+    <isoparser.version>1.0.2</isoparser.version>
+    <commons.logging.version>1.1.3</commons.logging.version>
+  </properties>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
 
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.drewnoakes</groupId>
+      <artifactId>metadata-extractor</artifactId>
+      <version>${metadata.extractor.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>${codec.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons.io.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi</artifactId>
+      <version>${poi.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-scratchpad</artifactId>
+      <version>${poi.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-ooxml</artifactId>
+      <version>${poi.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>stax</groupId>
+          <artifactId>stax-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xml-apis</groupId>
+          <artifactId>xml-apis</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.googlecode.mp4parser</groupId>
+      <artifactId>isoparser</artifactId>
+      <version>${isoparser.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.pdfbox</groupId>
+      <artifactId>jempbox</artifactId>
+      <version>${pdfbox.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-logging</groupId>
+      <artifactId>commons-logging</artifactId>
+      <version>${commons.logging.version}</version>
+    </dependency>
+  </dependencies>
+  
   <build>
     <plugins>
       <plugin>
-        <groupId>org.apache.felix</groupId>
-        <artifactId>maven-bundle-plugin</artifactId>
-        <extensions>true</extensions>
-        <configuration>
-          <instructions>
-            
<Bundle-Activator>org.apache.tika.module.multimedia.internal.Activator</Bundle-Activator>
-            <_runsystempackages>com.sun.xml.bind.marshaller,
-              com.sun.xml.internal.bind.marshaller</_runsystempackages>
-            <Embed-Dependency>
-              metadata-extractor,
-              xmpcore,
-              commons-codec,
-              commons-io,
-              jempbox,
-              poi,
-              isoparser,
-              aspectjrt,
-              
tika-parsers;inline=org/apache/tika/parser/image/**|org/apache/tika/parser/jpeg/**|org/apache/tika/parser/ocr/**|org/apache/tika/parser/audio/**|org/apache/tika/parser/video/**|org/apache/tika/parser/mp3/**|org/apache/tika/parser/mp4/**
-            </Embed-Dependency>
-            <Embed-Transitive>true</Embed-Transitive>
-            <Export-Package>
-              org.apache.tika.parser.image.*,
-              org.apache.tika.parser.jpeg.*,
-              org.apache.tika.parser.ocr.*,
-              org.apache.tika.parser.audio.*,
-              org.apache.tika.parser.video.*,
-              org.apache.tika.parser.mp3.*,
-              org.apache.tika.parser.mp4.*
-            </Export-Package>
-            <Import-Package>
-              *,
-              com.adobe.xmp;resolution:=optional,
-              com.adobe.xmp.properties;resolution:=optional,
-              android.util;resolution:=optional
-            </Import-Package>
-          </instructions>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-failsafe-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>integration-test</goal>
-              <goal>verify</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <systemPropertyVariables>
-            <org.ops4j.pax.logging.DefaultServiceLog.level>
-              WARN
-            </org.ops4j.pax.logging.DefaultServiceLog.level>
-          </systemPropertyVariables>
-          <systemProperties>
-            <property>
-              <name>project.bundle.file</name>
-              <value>target/${project.build.finalName}.jar</value>
-            </property>
-          </systemProperties>
-        </configuration>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
       </plugin>
     </plugins>
   </build>

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/AudioParser.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioFileFormat.Type;
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.UnsupportedAudioFileException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.XMPDM;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class AudioParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = -6015684081240882695L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                MediaType.audio("basic"),
+                MediaType.audio("x-wav"),
+                MediaType.audio("x-aiff"))));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        // AudioSystem expects the stream to support the mark feature
+        if (!stream.markSupported()) {
+            stream = new BufferedInputStream(stream);
+        }
+        try {
+            AudioFileFormat fileFormat = 
AudioSystem.getAudioFileFormat(stream);
+            Type type = fileFormat.getType();
+            if (type == Type.AIFC || type == Type.AIFF) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/x-aiff");
+            } else if (type == Type.AU || type == Type.SND) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/basic");
+            } else if (type == Type.WAVE) {
+                metadata.set(Metadata.CONTENT_TYPE, "audio/x-wav");
+            }
+
+            AudioFormat audioFormat = fileFormat.getFormat();
+            int channels = audioFormat.getChannels();
+            if (channels != AudioSystem.NOT_SPECIFIED) {
+                metadata.set("channels", String.valueOf(channels));
+                // TODO: Use XMPDM.TRACKS? (see also frame rate in AudioFormat)
+            }
+            float rate = audioFormat.getSampleRate();
+            if (rate != AudioSystem.NOT_SPECIFIED) {
+                metadata.set("samplerate", String.valueOf(rate));
+                metadata.set(
+                        XMPDM.AUDIO_SAMPLE_RATE,
+                        Integer.toString((int) rate));
+            }
+            int bits = audioFormat.getSampleSizeInBits();
+            if (bits != AudioSystem.NOT_SPECIFIED) {
+                metadata.set("bits", String.valueOf(bits));
+                if (bits == 8) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "8Int");
+                } else if (bits == 16) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "16Int");
+                } else if (bits == 32) {
+                    metadata.set(XMPDM.AUDIO_SAMPLE_TYPE, "32Int");
+                }
+            }
+            metadata.set("encoding", audioFormat.getEncoding().toString());
+
+            // Javadoc suggests that some of the following properties might
+            // be available, but I had no success in finding any:
+
+            // "duration" Long playback duration of the file in microseconds
+            // "author" String name of the author of this file
+            // "title" String title of this file
+            // "copyright" String copyright message
+            // "date" Date date of the recording or release
+            // "comment" String an arbitrary text
+
+            addMetadata(metadata, fileFormat.properties());
+            addMetadata(metadata, audioFormat.properties());
+        } catch (UnsupportedAudioFileException e) {
+            // There is no way to know whether this exception was
+            // caused by the document being corrupted or by the format
+            // just being unsupported. So we do nothing.
+        }
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+    private void addMetadata(Metadata metadata, Map<String, Object> 
properties) {
+        if (properties != null) {
+            for (Entry<String, Object> entry : properties.entrySet()) {
+                Object value = entry.getValue();
+                if (value != null) {
+                    metadata.set(entry.getKey(), value.toString());
+                }
+            }
+        }
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/audio/MidiParser.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.audio;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import javax.sound.midi.InvalidMidiDataException;
+import javax.sound.midi.MetaMessage;
+import javax.sound.midi.MidiMessage;
+import javax.sound.midi.MidiSystem;
+import javax.sound.midi.Patch;
+import javax.sound.midi.Sequence;
+import javax.sound.midi.Track;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
+public class MidiParser extends AbstractParser {
+
+    /** Serial version UID */
+    private static final long serialVersionUID = 6343278584336189432L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+        Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                MediaType.application("x-midi"),
+                MediaType.audio("midi"))));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        metadata.set(Metadata.CONTENT_TYPE, "audio/midi");
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+
+        // MidiSystem expects the stream to support the mark feature
+        InputStream buffered = new BufferedInputStream(stream);
+        try {
+            Sequence sequence = MidiSystem.getSequence(buffered);
+
+            Track[] tracks = sequence.getTracks();
+            metadata.set("tracks", String.valueOf(tracks.length));
+            // TODO: Use XMPDM.TRACKS?
+
+            Patch[] patches = sequence.getPatchList();
+            metadata.set("patches", String.valueOf(patches.length));
+
+            float type = sequence.getDivisionType();
+            if (type == Sequence.PPQ) {
+                metadata.set("divisionType", "PPQ");
+            } else if (type == Sequence.SMPTE_24) {
+                metadata.set("divisionType", "SMPTE_24");
+            } else if (type == Sequence.SMPTE_25) {
+                metadata.set("divisionType", "SMPTE_25");
+            } else if (type == Sequence.SMPTE_30) {
+                metadata.set("divisionType", "SMPTE_30");
+            } else if (type == Sequence.SMPTE_30DROP) {
+                metadata.set("divisionType", "SMPTE_30DROP");
+            } else if (type == Sequence.SMPTE_24) {
+                metadata.set("divisionType", String.valueOf(type));
+            }
+
+            for (Track track : tracks) {
+                xhtml.startElement("p");
+                for (int i = 0; i < track.size(); i++) {
+                    MidiMessage message = track.get(i).getMessage();
+                    if (message instanceof MetaMessage) {
+                        MetaMessage meta = (MetaMessage) message;
+                        // Types 1-15 are reserved for text events
+                        if (meta.getType() >= 1 && meta.getType() <= 15) {
+                            // FIXME: What's the encoding?
+                            xhtml.characters(
+                                    new String(meta.getData(), ISO_8859_1));
+                        }
+                    }
+                }
+                xhtml.endElement("p");
+            }
+        } catch (InvalidMidiDataException ignore) {
+            // There is no way to know whether this exception was
+            // caused by the document being corrupted or by the format
+            // just being unsupported. So we do nothing.
+        }
+
+        xhtml.endDocument();
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/BPGParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/BPGParser.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/BPGParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/BPGParser.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.EndianUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Photoshop;
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for the Better Portable Graphics )BPG) File Format.
+ * <p/>
+ * Documentation on the file format is available from
+ * http://bellard.org/bpg/bpg_spec.txt
+ */
+public class BPGParser extends AbstractParser {
+    protected static final int EXTENSION_TAG_EXIF = 1;
+    protected static final int EXTENSION_TAG_ICC_PROFILE = 2;
+    protected static final int EXTENSION_TAG_XMP = 3;
+    protected static final int EXTENSION_TAG_THUMBNAIL = 4;
+    private static final long serialVersionUID = -161736541253892772L;
+    private static final Set<MediaType> SUPPORTED_TYPES =
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    MediaType.image("x-bpg"), MediaType.image("bpg"))));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        // Check for the magic header signature
+        byte[] signature = new byte[4];
+        IOUtils.readFully(stream, signature);
+        if (signature[0] == (byte) 'B' && signature[1] == (byte) 'P' &&
+                signature[2] == (byte) 'G' && signature[3] == (byte) 0xfb) {
+            // Good, signature found
+        } else {
+            throw new TikaException("BPG magic signature invalid");
+        }
+
+        // Grab and decode the first byte
+        int pdf = stream.read();
+
+        // Pixel format: Greyscale / 4:2:0 / 4:2:2 / 4:4:4
+        int pixelFormat = pdf & 0x7;
+        // TODO Identify a suitable metadata key for this
+
+        // Is there an alpha plane as well as a colour plane?
+        boolean hasAlphaPlane1 = (pdf & 0x8) == 0x8;
+        // TODO Identify a suitable metadata key for this+hasAlphaPlane2
+
+        // Bit depth minus 8
+        int bitDepth = (pdf >> 4) + 8;
+        metadata.set(TIFF.BITS_PER_SAMPLE, Integer.toString(bitDepth));
+
+        // Grab and decode the second byte
+        int cer = stream.read();
+
+        // Colour Space: YCbCr / RGB / YCgCo / YCbCrK / CMYK
+        int colourSpace = cer & 0x15;
+        switch (colourSpace) {
+            case 0:
+                metadata.set(Photoshop.COLOR_MODE, "YCbCr Colour");
+                break;
+            case 1:
+                metadata.set(Photoshop.COLOR_MODE, "RGB Colour");
+                break;
+            case 2:
+                metadata.set(Photoshop.COLOR_MODE, "YCgCo Colour");
+                break;
+            case 3:
+                metadata.set(Photoshop.COLOR_MODE, "YCbCrK Colour");
+                break;
+            case 4:
+                metadata.set(Photoshop.COLOR_MODE, "CMYK Colour");
+                break;
+        }
+
+        // Are there extensions or not?
+        boolean hasExtensions = (cer & 16) == 16;
+
+        // Is the Alpha Plane 2 flag set?
+        boolean hasAlphaPlane2 = (cer & 32) == 32;
+
+        // cer then holds 2 more booleans - limited range, reserved
+
+        // Width and height next
+        int width = (int) EndianUtils.readUE7(stream);
+        int height = (int) EndianUtils.readUE7(stream);
+        metadata.set(TIFF.IMAGE_LENGTH, height);
+        metadata.set(TIFF.IMAGE_WIDTH, width);
+
+        // Picture Data length
+        EndianUtils.readUE7(stream);
+
+        // Extension Data Length, if extensions present
+        long extensionDataLength = 0;
+        if (hasExtensions)
+            extensionDataLength = EndianUtils.readUE7(stream);
+
+        // Alpha Data Length, if alpha used
+        long alphaDataLength = 0;
+        if (hasAlphaPlane1 || hasAlphaPlane2)
+            alphaDataLength = EndianUtils.readUE7(stream);
+
+        // Extension Data
+        if (hasExtensions) {
+            long extensionsDataSeen = 0;
+            ImageMetadataExtractor metadataExtractor =
+                    new ImageMetadataExtractor(metadata);
+
+            while (extensionsDataSeen < extensionDataLength) {
+                int extensionType = (int) EndianUtils.readUE7(stream);
+                int extensionLength = (int) EndianUtils.readUE7(stream);
+                switch (extensionType) {
+                    case EXTENSION_TAG_EXIF:
+                        metadataExtractor.parseRawExif(stream, 
extensionLength, true);
+                        break;
+                    case EXTENSION_TAG_XMP:
+                        handleXMP(stream, extensionLength, metadataExtractor);
+                        break;
+                    default:
+                        stream.skip(extensionLength);
+                }
+                extensionsDataSeen += extensionLength;
+            }
+        }
+
+        // HEVC Header + Data
+        // Alpha HEVC Header + Data
+        // We can't do anything with these parts
+
+        // We don't have any helpful text, sorry...
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+    protected void handleXMP(InputStream stream, int xmpLength,
+                             ImageMetadataExtractor extractor) throws 
IOException, TikaException, SAXException {
+        byte[] xmp = new byte[xmpLength];
+        IOUtils.readFully(stream, xmp);
+        extractor.parseRawXMP(xmp);
+    }
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,548 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.text.DecimalFormat;
+import java.text.DecimalFormatSymbols;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import com.drew.imaging.jpeg.JpegMetadataReader;
+import com.drew.imaging.jpeg.JpegProcessingException;
+import com.drew.imaging.riff.RiffProcessingException;
+import com.drew.imaging.tiff.TiffMetadataReader;
+import com.drew.imaging.tiff.TiffProcessingException;
+import com.drew.imaging.webp.WebpMetadataReader;
+import com.drew.lang.ByteArrayReader;
+import com.drew.lang.GeoLocation;
+import com.drew.lang.Rational;
+import com.drew.metadata.Directory;
+import com.drew.metadata.MetadataException;
+import com.drew.metadata.Tag;
+import com.drew.metadata.exif.ExifIFD0Directory;
+import com.drew.metadata.exif.ExifReader;
+import com.drew.metadata.exif.ExifSubIFDDirectory;
+import com.drew.metadata.exif.ExifThumbnailDirectory;
+import com.drew.metadata.exif.GpsDirectory;
+import com.drew.metadata.iptc.IptcDirectory;
+import com.drew.metadata.jpeg.JpegCommentDirectory;
+import com.drew.metadata.jpeg.JpegDirectory;
+import com.drew.metadata.xmp.XmpReader;
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.IPTC;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.xml.sax.SAXException;
+
+/**
+ * Uses the <a href="http://www.drewnoakes.com/code/exif/";>Metadata 
Extractor</a> library
+ * to read EXIF and IPTC image metadata and map to Tika fields.
+ * <p/>
+ * As of 2.4.0 the library supports jpeg and tiff.
+ * As of 2.8.0 the library supports webp.
+ */
+public class ImageMetadataExtractor {
+
+    private static final String GEO_DECIMAL_FORMAT_STRING = "#.######"; // 6 
dp seems to be reasonable
+    private final Metadata metadata;
+    private DirectoryHandler[] handlers;
+
+    /**
+     * @param metadata to extract to, using default directory handlers
+     */
+    public ImageMetadataExtractor(Metadata metadata) {
+        this(metadata,
+                new CopyUnknownFieldsHandler(),
+                new JpegCommentHandler(),
+                new ExifHandler(),
+                new DimensionsHandler(),
+                new GeotagHandler(),
+                new IptcHandler()
+        );
+    }
+
+    /**
+     * @param metadata to extract to
+     * @param handlers handlers in order, note that handlers may override 
values from earlier handlers
+     */
+    public ImageMetadataExtractor(Metadata metadata, DirectoryHandler... 
handlers) {
+        this.metadata = metadata;
+        this.handlers = handlers;
+    }
+
+    private static String trimPixels(String s) {
+        //if height/width appears as "100 pixels", trim " pixels"
+        if (s != null) {
+            int i = s.lastIndexOf(" pixels");
+            s = s.substring(0, i);
+        }
+        return s;
+    }
+
+    public void parseJpeg(File file)
+            throws IOException, SAXException, TikaException {
+        try {
+            com.drew.metadata.Metadata jpegMetadata = 
JpegMetadataReader.readMetadata(file);
+            handle(jpegMetadata);
+        } catch (JpegProcessingException e) {
+            throw new TikaException("Can't read JPEG metadata", e);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't read JPEG metadata", e);
+        }
+    }
+
+    public void parseTiff(File file)
+            throws IOException, SAXException, TikaException {
+        try {
+            com.drew.metadata.Metadata tiffMetadata = 
TiffMetadataReader.readMetadata(file);
+            handle(tiffMetadata);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't read TIFF metadata", e);
+        } catch (TiffProcessingException e) {
+            throw new TikaException("Can't read TIFF metadata", e);
+        }
+    }
+
+    public void parseWebP(File file) throws IOException, TikaException {
+
+        try {
+            com.drew.metadata.Metadata webPMetadata = new 
com.drew.metadata.Metadata();
+            webPMetadata = WebpMetadataReader.readMetadata(file);
+            handle(webPMetadata);
+        } catch (IOException e) {
+            throw e;
+        } catch (RiffProcessingException e) {
+            throw new TikaException("Can't process Riff data", e);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't process Riff data", e);
+        }
+    }
+
+    public void parseRawExif(InputStream stream, int length, boolean 
needsExifHeader)
+            throws IOException, SAXException, TikaException {
+        byte[] exif;
+        if (needsExifHeader) {
+            exif = new byte[length + 6];
+            exif[0] = (byte) 'E';
+            exif[1] = (byte) 'x';
+            exif[2] = (byte) 'i';
+            exif[3] = (byte) 'f';
+            IOUtils.readFully(stream, exif, 6, length);
+        } else {
+            exif = new byte[length];
+            IOUtils.readFully(stream, exif, 0, length);
+        }
+        parseRawExif(exif);
+    }
+
+    public void parseRawExif(byte[] exifData)
+            throws IOException, SAXException, TikaException {
+        com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
+        ExifReader reader = new ExifReader();
+        reader.extract(new ByteArrayReader(exifData), metadata, 
ExifReader.JPEG_SEGMENT_PREAMBLE.length());
+
+        try {
+            handle(metadata);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't process the EXIF Data", e);
+        }
+    }
+
+    public void parseRawXMP(byte[] xmpData)
+            throws IOException, SAXException, TikaException {
+        com.drew.metadata.Metadata metadata = new com.drew.metadata.Metadata();
+        XmpReader reader = new XmpReader();
+        reader.extract(xmpData, metadata);
+
+        try {
+            handle(metadata);
+        } catch (MetadataException e) {
+            throw new TikaException("Can't process the XMP Data", e);
+        }
+    }
+
+    /**
+     * Copies extracted tags to tika metadata using registered handlers.
+     *
+     * @param metadataExtractor Tag directories from a Metadata Extractor 
"reader"
+     * @throws MetadataException This method does not handle exceptions from 
Metadata Extractor
+     */
+    protected void handle(com.drew.metadata.Metadata metadataExtractor)
+            throws MetadataException {
+        handle(metadataExtractor.getDirectories().iterator());
+    }
+
+    /**
+     * Copies extracted tags to tika metadata using registered handlers.
+     *
+     * @param directories Metadata Extractor {@link 
com.drew.metadata.Directory} instances.
+     * @throws MetadataException This method does not handle exceptions from 
Metadata Extractor
+     */
+    protected void handle(Iterator<Directory> directories) throws 
MetadataException {
+        while (directories.hasNext()) {
+            Directory directory = directories.next();
+            for (DirectoryHandler handler : handlers) {
+                if (handler.supports(directory.getClass())) {
+                    handler.handle(directory, metadata);
+                }
+            }
+        }
+    }
+
+    /**
+     * Reads one or more type of Metadata Extractor fields.
+     */
+    static interface DirectoryHandler {
+        /**
+         * @param directoryType A Metadata Extractor directory class
+         * @return true if the directory type is supported by this handler
+         */
+        boolean supports(Class<? extends Directory> directoryType);
+
+        /**
+         * @param directory extracted tags
+         * @param metadata  current tika metadata
+         * @throws MetadataException typically field extraction error, aborts 
all further extraction
+         */
+        void handle(Directory directory, Metadata metadata)
+                throws MetadataException;
+    }
+
+    /**
+     * Mimics the behavior from TIKA-314 of copying all extracted tags
+     * to tika metadata using field names from Metadata Extractor.
+     */
+    static class CopyAllFieldsHandler implements DirectoryHandler {
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return true;
+        }
+
+        public void handle(Directory directory, Metadata metadata)
+                throws MetadataException {
+            if (directory.getTags() != null) {
+                for (Tag tag : directory.getTags()) {
+                    metadata.set(tag.getTagName(), tag.getDescription());
+                }
+            }
+        }
+    }
+
+    /**
+     * Copies all fields regardless of directory, if the tag name
+     * is not identical to a known Metadata field name.
+     * This leads to more predictable behavior than {@link 
CopyAllFieldsHandler}.
+     */
+    static class CopyUnknownFieldsHandler implements DirectoryHandler {
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return true;
+        }
+
+        public void handle(Directory directory, Metadata metadata)
+                throws MetadataException {
+            if (directory.getTags() != null) {
+                for (Tag tag : directory.getTags()) {
+                    String name = tag.getTagName();
+                    if (!MetadataFields.isMetadataField(name) && 
tag.getDescription() != null) {
+                        String value = tag.getDescription().trim();
+                        if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+                            value = Boolean.TRUE.toString();
+                        } else if 
(Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+                            value = Boolean.FALSE.toString();
+                        }
+                        metadata.set(name, value);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Basic image properties for TIFF and JPEG, at least.
+     */
+    static class DimensionsHandler implements DirectoryHandler {
+        private final Pattern LEADING_NUMBERS = 
Pattern.compile("(\\d+)\\s*.*");
+
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return directoryType == JpegDirectory.class ||
+                    directoryType == ExifSubIFDDirectory.class ||
+                    directoryType == ExifThumbnailDirectory.class ||
+                    directoryType == ExifIFD0Directory.class;
+        }
+
+        public void handle(Directory directory, Metadata metadata) throws 
MetadataException {
+            // The test TIFF has width and height stored as follows according 
to exiv2
+            //Exif.Image.ImageWidth                        Short       1  100
+            //Exif.Image.ImageLength                       Short       1  75
+            // and the values are found in "Thumbnail Image Width" (and 
Height) from Metadata Extractor
+            set(directory, metadata, JpegDirectory.TAG_IMAGE_WIDTH, 
Metadata.IMAGE_WIDTH);
+            set(directory, metadata, JpegDirectory.TAG_IMAGE_HEIGHT, 
Metadata.IMAGE_LENGTH);
+            // Bits per sample, two methods of extracting, exif overrides jpeg
+            set(directory, metadata, JpegDirectory.TAG_DATA_PRECISION, 
Metadata.BITS_PER_SAMPLE);
+            set(directory, metadata, ExifSubIFDDirectory.TAG_BITS_PER_SAMPLE, 
Metadata.BITS_PER_SAMPLE);
+            // Straightforward
+            set(directory, metadata, 
ExifSubIFDDirectory.TAG_SAMPLES_PER_PIXEL, Metadata.SAMPLES_PER_PIXEL);
+        }
+
+        private void set(Directory directory, Metadata metadata, int 
extractTag, Property metadataField) {
+            if (directory.containsTag(extractTag)) {
+                Matcher m = 
LEADING_NUMBERS.matcher(directory.getString(extractTag));
+                if (m.matches()) {
+                    metadata.set(metadataField, m.group(1));
+                }
+            }
+        }
+    }
+
+    static class JpegCommentHandler implements DirectoryHandler {
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return directoryType == JpegCommentDirectory.class;
+        }
+
+        public void handle(Directory directory, Metadata metadata) throws 
MetadataException {
+            if (directory.containsTag(JpegCommentDirectory.TAG_COMMENT)) {
+                metadata.add(TikaCoreProperties.COMMENTS, 
directory.getString(JpegCommentDirectory.TAG_COMMENT));
+            }
+        }
+    }
+
+    static class ExifHandler implements DirectoryHandler {
+        // There's a new ExifHandler for each file processed, so this is 
thread safe
+        private static final ThreadLocal<SimpleDateFormat> DATE_UNSPECIFIED_TZ 
= new ThreadLocal<SimpleDateFormat>() {
+            @Override
+            protected SimpleDateFormat initialValue() {
+                return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", 
Locale.US);
+            }
+        };
+
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return directoryType == ExifIFD0Directory.class ||
+                    directoryType == ExifSubIFDDirectory.class;
+        }
+
+        public void handle(Directory directory, Metadata metadata) {
+            try {
+                handleDateTags(directory, metadata);
+                handlePhotoTags(directory, metadata);
+                handleCommentTags(directory, metadata);
+            } catch (MetadataException e) {
+                // ignore date parse errors and proceed with other tags
+            }
+        }
+
+        /**
+         * EXIF may contain image description, although with undefined 
encoding.
+         * Use IPTC for other annotation fields, and XMP for unicode support.
+         */
+        public void handleCommentTags(Directory directory, Metadata metadata) {
+            if (metadata.get(TikaCoreProperties.DESCRIPTION) == null &&
+                    
directory.containsTag(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION)) {
+                metadata.set(TikaCoreProperties.DESCRIPTION,
+                        
directory.getString(ExifIFD0Directory.TAG_IMAGE_DESCRIPTION));
+            }
+        }
+
+        /**
+         * Maps common TIFF and EXIF tags onto the Tika
+         * TIFF image metadata namespace.
+         */
+        public void handlePhotoTags(Directory directory, Metadata metadata) {
+            if (directory.containsTag(ExifSubIFDDirectory.TAG_EXPOSURE_TIME)) {
+                Object exposure = 
directory.getObject(ExifSubIFDDirectory.TAG_EXPOSURE_TIME);
+                if (exposure instanceof Rational) {
+                    metadata.set(Metadata.EXPOSURE_TIME, ((Rational) 
exposure).doubleValue());
+                } else {
+                    metadata.set(Metadata.EXPOSURE_TIME, 
directory.getString(ExifSubIFDDirectory.TAG_EXPOSURE_TIME));
+                }
+            }
+
+            if (directory.containsTag(ExifSubIFDDirectory.TAG_FLASH)) {
+                String flash = 
directory.getDescription(ExifSubIFDDirectory.TAG_FLASH);
+                if (flash.contains("Flash fired")) {
+                    metadata.set(Metadata.FLASH_FIRED, 
Boolean.TRUE.toString());
+                } else if (flash.contains("Flash did not fire")) {
+                    metadata.set(Metadata.FLASH_FIRED, 
Boolean.FALSE.toString());
+                } else {
+                    metadata.set(Metadata.FLASH_FIRED, flash);
+                }
+            }
+
+            if (directory.containsTag(ExifSubIFDDirectory.TAG_FNUMBER)) {
+                Object fnumber = 
directory.getObject(ExifSubIFDDirectory.TAG_FNUMBER);
+                if (fnumber instanceof Rational) {
+                    metadata.set(Metadata.F_NUMBER, ((Rational) 
fnumber).doubleValue());
+                } else {
+                    metadata.set(Metadata.F_NUMBER, 
directory.getString(ExifSubIFDDirectory.TAG_FNUMBER));
+                }
+            }
+
+            if (directory.containsTag(ExifSubIFDDirectory.TAG_FOCAL_LENGTH)) {
+                Object length = 
directory.getObject(ExifSubIFDDirectory.TAG_FOCAL_LENGTH);
+                if (length instanceof Rational) {
+                    metadata.set(Metadata.FOCAL_LENGTH, ((Rational) 
length).doubleValue());
+                } else {
+                    metadata.set(Metadata.FOCAL_LENGTH, 
directory.getString(ExifSubIFDDirectory.TAG_FOCAL_LENGTH));
+                }
+            }
+
+            if (directory.containsTag(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT)) 
{
+                metadata.set(Metadata.ISO_SPEED_RATINGS, 
directory.getString(ExifSubIFDDirectory.TAG_ISO_EQUIVALENT));
+            }
+
+            if (directory.containsTag(ExifIFD0Directory.TAG_MAKE)) {
+                metadata.set(Metadata.EQUIPMENT_MAKE, 
directory.getString(ExifIFD0Directory.TAG_MAKE));
+            }
+            if (directory.containsTag(ExifIFD0Directory.TAG_MODEL)) {
+                metadata.set(Metadata.EQUIPMENT_MODEL, 
directory.getString(ExifIFD0Directory.TAG_MODEL));
+            }
+
+            if (directory.containsTag(ExifIFD0Directory.TAG_ORIENTATION)) {
+                Object length = 
directory.getObject(ExifIFD0Directory.TAG_ORIENTATION);
+                if (length instanceof Integer) {
+                    metadata.set(Metadata.ORIENTATION, 
Integer.toString((Integer) length));
+                } else {
+                    metadata.set(Metadata.ORIENTATION, 
directory.getString(ExifIFD0Directory.TAG_ORIENTATION));
+                }
+            }
+
+            if (directory.containsTag(ExifIFD0Directory.TAG_SOFTWARE)) {
+                metadata.set(Metadata.SOFTWARE, 
directory.getString(ExifIFD0Directory.TAG_SOFTWARE));
+            }
+
+            if (directory.containsTag(ExifIFD0Directory.TAG_X_RESOLUTION)) {
+                Object resolution = 
directory.getObject(ExifIFD0Directory.TAG_X_RESOLUTION);
+                if (resolution instanceof Rational) {
+                    metadata.set(Metadata.RESOLUTION_HORIZONTAL, ((Rational) 
resolution).doubleValue());
+                } else {
+                    metadata.set(Metadata.RESOLUTION_HORIZONTAL, 
directory.getString(ExifIFD0Directory.TAG_X_RESOLUTION));
+                }
+            }
+            if (directory.containsTag(ExifIFD0Directory.TAG_Y_RESOLUTION)) {
+                Object resolution = 
directory.getObject(ExifIFD0Directory.TAG_Y_RESOLUTION);
+                if (resolution instanceof Rational) {
+                    metadata.set(Metadata.RESOLUTION_VERTICAL, ((Rational) 
resolution).doubleValue());
+                } else {
+                    metadata.set(Metadata.RESOLUTION_VERTICAL, 
directory.getString(ExifIFD0Directory.TAG_Y_RESOLUTION));
+                }
+            }
+            if (directory.containsTag(ExifIFD0Directory.TAG_RESOLUTION_UNIT)) {
+                metadata.set(Metadata.RESOLUTION_UNIT, 
directory.getDescription(ExifIFD0Directory.TAG_RESOLUTION_UNIT));
+            }
+            if (directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)) 
{
+                metadata.set(Metadata.IMAGE_WIDTH,
+                        
trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_WIDTH)));
+            }
+            if 
(directory.containsTag(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)) {
+                metadata.set(Metadata.IMAGE_LENGTH,
+                        
trimPixels(directory.getDescription(ExifThumbnailDirectory.TAG_IMAGE_HEIGHT)));
+            }
+        }
+
+        /**
+         * Maps exif dates to metadata fields.
+         */
+        public void handleDateTags(Directory directory, Metadata metadata)
+                throws MetadataException {
+            // Date/Time Original overrides value from 
ExifDirectory.TAG_DATETIME
+            Date original = null;
+            if 
(directory.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)) {
+                original = 
directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL);
+                // Unless we have GPS time we don't know the time zone so date 
must be set
+                // as ISO 8601 datetime without timezone suffix (no Z or +/-)
+                if (original != null) {
+                    String datetimeNoTimeZone = 
DATE_UNSPECIFIED_TZ.get().format(original); // Same time zone as Metadata 
Extractor uses
+                    metadata.set(TikaCoreProperties.CREATED, 
datetimeNoTimeZone);
+                    metadata.set(Metadata.ORIGINAL_DATE, datetimeNoTimeZone);
+                }
+            }
+            if (directory.containsTag(ExifIFD0Directory.TAG_DATETIME)) {
+                Date datetime = 
directory.getDate(ExifIFD0Directory.TAG_DATETIME);
+                if (datetime != null) {
+                    String datetimeNoTimeZone = 
DATE_UNSPECIFIED_TZ.get().format(datetime);
+                    metadata.set(TikaCoreProperties.MODIFIED, 
datetimeNoTimeZone);
+                    // If Date/Time Original does not exist this might be 
creation date
+                    if (metadata.get(TikaCoreProperties.CREATED) == null) {
+                        metadata.set(TikaCoreProperties.CREATED, 
datetimeNoTimeZone);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Reads image comments, originally TIKA-472.
+     * Metadata Extractor does not read XMP so we need to use the values from 
Iptc or EXIF
+     */
+    static class IptcHandler implements DirectoryHandler {
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return directoryType == IptcDirectory.class;
+        }
+
+        public void handle(Directory directory, Metadata metadata)
+                throws MetadataException {
+            if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
+                String[] keywords = 
directory.getStringArray(IptcDirectory.TAG_KEYWORDS);
+                for (String k : keywords) {
+                    metadata.add(TikaCoreProperties.KEYWORDS, k);
+                }
+            }
+            if (directory.containsTag(IptcDirectory.TAG_HEADLINE)) {
+                metadata.set(TikaCoreProperties.TITLE, 
directory.getString(IptcDirectory.TAG_HEADLINE));
+            } else if (directory.containsTag(IptcDirectory.TAG_OBJECT_NAME)) {
+                metadata.set(TikaCoreProperties.TITLE, 
directory.getString(IptcDirectory.TAG_OBJECT_NAME));
+            }
+            if (directory.containsTag(IptcDirectory.TAG_BY_LINE)) {
+                metadata.set(TikaCoreProperties.CREATOR, 
directory.getString(IptcDirectory.TAG_BY_LINE));
+                metadata.set(IPTC.CREATOR, 
directory.getString(IptcDirectory.TAG_BY_LINE));
+            }
+            if (directory.containsTag(IptcDirectory.TAG_CAPTION)) {
+                metadata.set(TikaCoreProperties.DESCRIPTION,
+                        // Looks like metadata extractor returns IPTC newlines 
as a single carriage return,
+                        // but the exiv2 command does not so we change to line 
feed here because that is less surprising to users                        
+                        
directory.getString(IptcDirectory.TAG_CAPTION).replaceAll("\r\n?", "\n"));
+            }
+        }
+    }
+
+    /**
+     * Maps EXIF Geo Tags onto the Tika Geo metadata namespace.
+     */
+    static class GeotagHandler implements DirectoryHandler {
+        public boolean supports(Class<? extends Directory> directoryType) {
+            return directoryType == GpsDirectory.class;
+        }
+
+        public void handle(Directory directory, Metadata metadata) throws 
MetadataException {
+            GeoLocation geoLocation = ((GpsDirectory) 
directory).getGeoLocation();
+            if (geoLocation != null) {
+                DecimalFormat geoDecimalFormat = new 
DecimalFormat(GEO_DECIMAL_FORMAT_STRING,
+                        new DecimalFormatSymbols(Locale.ENGLISH));
+                metadata.set(TikaCoreProperties.LATITUDE, 
geoDecimalFormat.format(geoLocation.getLatitude()));
+                metadata.set(TikaCoreProperties.LONGITUDE, 
geoDecimalFormat.format(geoLocation.getLongitude()));
+            }
+        }
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/ImageParser.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import javax.imageio.IIOException;
+import javax.imageio.ImageIO;
+import javax.imageio.ImageReader;
+import javax.imageio.metadata.IIOMetadata;
+import javax.imageio.stream.ImageInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ImageParser extends AbstractParser {
+
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = 7852529269245520335L;
+
+    private static final MediaType CANONICAL_BMP_TYPE = 
MediaType.image("x-ms-bmp");
+    private static final MediaType JAVA_BMP_TYPE = MediaType.image("bmp");
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    CANONICAL_BMP_TYPE,
+                    JAVA_BMP_TYPE,
+                    MediaType.image("gif"),
+                    MediaType.image("png"),
+                    MediaType.image("vnd.wap.wbmp"),
+                    MediaType.image("x-icon"),
+                    MediaType.image("x-xcf"))));
+
+    private static void setIfPresent(Metadata metadata, String imageIOkey, 
String tikaKey) {
+        if (metadata.get(imageIOkey) != null) {
+            metadata.set(tikaKey, metadata.get(imageIOkey));
+        }
+    }
+
+    private static void setIfPresent(Metadata metadata, String imageIOkey, 
Property tikaProp) {
+        if (metadata.get(imageIOkey) != null) {
+            String v = metadata.get(imageIOkey);
+            if (v.endsWith(" ")) {
+                v = v.substring(0, v.lastIndexOf(' '));
+            }
+            metadata.set(tikaProp, v);
+        }
+    }
+
+    private static void loadMetadata(IIOMetadata imageMetadata, Metadata 
metadata) {
+        String[] names = imageMetadata.getMetadataFormatNames();
+        if (names == null) {
+            return;
+        }
+        for (String name : names) {
+            loadNode(metadata, imageMetadata.getAsTree(name), "", false);
+        }
+    }
+
+    private static void loadNode(
+            Metadata metadata, Node node, String parents,
+            boolean addThisNodeName) {
+        if (addThisNodeName) {
+            if (parents.length() > 0) {
+                parents += " ";
+            }
+            parents += node.getNodeName();
+        }
+        NamedNodeMap map = node.getAttributes();
+        if (map != null) {
+
+            int length = map.getLength();
+            if (length == 1) {
+                metadata.add(parents, normalize(map.item(0).getNodeValue()));
+            } else if (length > 1) {
+                StringBuilder value = new StringBuilder();
+                for (int i = 0; i < length; i++) {
+                    if (i > 0) {
+                        value.append(", ");
+                    }
+                    Node attr = map.item(i);
+                    value.append(attr.getNodeName());
+                    value.append("=");
+                    value.append(normalize(attr.getNodeValue()));
+                }
+                metadata.add(parents, value.toString());
+            }
+        }
+
+        Node child = node.getFirstChild();
+        while (child != null) {
+            // print children recursively
+            loadNode(metadata, child, parents, true);
+            child = child.getNextSibling();
+        }
+    }
+
+    private static String normalize(String value) {
+        if (value != null) {
+            value = value.trim();
+        } else {
+            value = "";
+        }
+        if (Boolean.TRUE.toString().equalsIgnoreCase(value)) {
+            return Boolean.TRUE.toString();
+        } else if (Boolean.FALSE.toString().equalsIgnoreCase(value)) {
+            return Boolean.FALSE.toString();
+        }
+        return value;
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        String type = metadata.get(Metadata.CONTENT_TYPE);
+        if (type != null) {
+            // Java has a different idea of the BMP mime type to
+            //  what the canonical one is, fix this up.
+            if (CANONICAL_BMP_TYPE.toString().equals(type)) {
+                type = JAVA_BMP_TYPE.toString();
+            }
+
+            try {
+                Iterator<ImageReader> iterator =
+                        ImageIO.getImageReadersByMIMEType(type);
+                if (iterator.hasNext()) {
+                    ImageReader reader = iterator.next();
+                    try {
+                        try (ImageInputStream imageStream = 
ImageIO.createImageInputStream(
+                                new CloseShieldInputStream(stream))) {
+                            reader.setInput(imageStream);
+
+                            metadata.set(Metadata.IMAGE_WIDTH, 
Integer.toString(reader.getWidth(0)));
+                            metadata.set(Metadata.IMAGE_LENGTH, 
Integer.toString(reader.getHeight(0)));
+                            metadata.set("height", 
Integer.toString(reader.getHeight(0)));
+                            metadata.set("width", 
Integer.toString(reader.getWidth(0)));
+
+                            loadMetadata(reader.getImageMetadata(0), metadata);
+                        }
+                    } finally {
+                        reader.dispose();
+                    }
+                }
+
+                // Translate certain Metadata tags from the ImageIO
+                //  specific namespace into the general Tika one
+                setIfPresent(metadata, "CommentExtensions CommentExtension", 
TikaCoreProperties.COMMENTS);
+                setIfPresent(metadata, "markerSequence com", 
TikaCoreProperties.COMMENTS);
+                setIfPresent(metadata, "Data BitsPerSample", 
Metadata.BITS_PER_SAMPLE);
+            } catch (IIOException e) {
+                // TIKA-619: There is a known bug in the Sun API when dealing 
with GIF images
+                //  which Tika will just ignore.
+                if (!(e.getMessage() != null &&
+                        e.getMessage().equals("Unexpected block type 0!") &&
+                        type.equals("image/gif"))) {
+                    throw new TikaException(type + " parse error", e);
+                }
+            }
+        }
+
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/MetadataFields.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/MetadataFields.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/MetadataFields.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/MetadataFields.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.util.HashSet;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
+
+/**
+ * Knowns about all declared {@link Metadata} fields.
+ * Didn't find this functionality anywhere so it was added for
+ * ImageMetadataExtractor, but it can be generalized.
+ */
+public abstract class MetadataFields {
+
+    private static HashSet<String> known;
+
+    static {
+        known = new HashSet<String>();
+        setKnownForClass(TikaCoreProperties.class);
+        setKnownForClass(Metadata.class);
+    }
+
+    private static void setKnownForClass(Class<?> clazz) {
+        Field[] fields = clazz.getFields();
+        for (Field f : fields) {
+            int mod = f.getModifiers();
+            if (Modifier.isPublic(mod) && Modifier.isStatic(mod) && 
Modifier.isFinal(mod)) {
+                Class<?> c = f.getType();
+                if (String.class.equals(c)) {
+                    try {
+                        String p = (String) f.get(null);
+                        if (p != null) {
+                            known.add(p);
+                        }
+                    } catch (IllegalArgumentException e) {
+                        e.printStackTrace();
+                    } catch (IllegalAccessException e) {
+                        e.printStackTrace();
+                    }
+                }
+                if (Property.class.isAssignableFrom(c)) {
+                    try {
+                        Property p = (Property) f.get(null);
+                        if (p != null) {
+                            known.add(p.getName());
+                        }
+                    } catch (IllegalArgumentException e) {
+                        e.printStackTrace();
+                    } catch (IllegalAccessException e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+        }
+    }
+
+    public static boolean isMetadataField(String name) {
+        return known.contains(name);
+    }
+
+    public static boolean isMetadataField(Property property) {
+        return known.contains(property.getName());
+    }
+
+}

Added: 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/PSDParser.java?rev=1722029&view=auto
==============================================================================
--- 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
 (added)
+++ 
tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
 Mon Dec 28 23:22:46 2015
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.EndianUtils;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Photoshop;
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import static java.nio.charset.StandardCharsets.US_ASCII;
+
+/**
+ * Parser for the Adobe Photoshop PSD File Format.
+ * <p/>
+ * Documentation on the file format is available from
+ * 
http://www.adobe.com/devnet-apps/photoshop/fileformatashtml/PhotoshopFileFormats.htm
+ */
+public class PSDParser extends AbstractParser {
+
+    /**
+     * Serial version UID
+     */
+    private static final long serialVersionUID = 883387734607994914L;
+
+    private static final Set<MediaType> SUPPORTED_TYPES =
+            Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+                    MediaType.image("vnd.adobe.photoshop"))));
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context)
+            throws IOException, SAXException, TikaException {
+        // Check for the magic header signature
+        byte[] signature = new byte[4];
+        IOUtils.readFully(stream, signature);
+        if (signature[0] == (byte) '8' && signature[1] == (byte) 'B' &&
+                signature[2] == (byte) 'P' && signature[3] == (byte) 'S') {
+            // Good, signature found
+        } else {
+            throw new TikaException("PSD/PSB magic signature invalid");
+        }
+
+        // Check the version
+        int version = EndianUtils.readUShortBE(stream);
+        if (version == 1 || version == 2) {
+            // Good, we support these two
+        } else {
+            throw new TikaException("Invalid PSD/PSB version " + version);
+        }
+
+        // Skip the reserved block
+        IOUtils.readFully(stream, new byte[6]);
+
+        // Number of channels in the image
+        int numChannels = EndianUtils.readUShortBE(stream);
+        // TODO Identify a suitable metadata key for this
+
+        // Width and Height
+        int height = EndianUtils.readIntBE(stream);
+        int width = EndianUtils.readIntBE(stream);
+        metadata.set(TIFF.IMAGE_LENGTH, height);
+        metadata.set(TIFF.IMAGE_WIDTH, width);
+
+        // Depth (bits per channel)
+        int depth = EndianUtils.readUShortBE(stream);
+        metadata.set(TIFF.BITS_PER_SAMPLE, Integer.toString(depth));
+
+        // Colour mode, eg Bitmap or RGB
+        int colorMode = EndianUtils.readUShortBE(stream);
+        metadata.set(Photoshop.COLOR_MODE, 
Photoshop._COLOR_MODE_CHOICES_INDEXED[colorMode]);
+
+        // Next is the Color Mode section
+        // We don't care about this bit
+        long colorModeSectionSize = EndianUtils.readIntBE(stream);
+        stream.skip(colorModeSectionSize);
+
+        // Next is the Image Resources section
+        // Check for certain interesting keys here
+        long imageResourcesSectionSize = EndianUtils.readIntBE(stream);
+        long read = 0;
+        while (read < imageResourcesSectionSize) {
+            ResourceBlock rb = new ResourceBlock(stream);
+            read += rb.totalLength;
+
+            // Is it one we can do something useful with?
+            if (rb.id == ResourceBlock.ID_CAPTION) {
+                metadata.add(TikaCoreProperties.DESCRIPTION, 
rb.getDataAsString());
+            } else if (rb.id == ResourceBlock.ID_EXIF_1) {
+                // TODO Parse the EXIF info via ImageMetadataExtractor
+            } else if (rb.id == ResourceBlock.ID_EXIF_3) {
+                // TODO Parse the EXIF info via ImageMetadataExtractor
+            } else if (rb.id == ResourceBlock.ID_XMP) {
+                // TODO Parse the XMP info via ImageMetadataExtractor
+            }
+        }
+
+        // Next is the Layer and Mask Info
+        // Finally we have Image Data
+        // We can't do anything with these parts
+
+        // We don't have any helpful text, sorry...
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+    private static class ResourceBlock {
+        private static final long SIGNATURE = 0x3842494d; // 8BIM
+        private static final int ID_CAPTION = 0x03F0;
+        private static final int ID_URL = 0x040B;
+        private static final int ID_EXIF_1 = 0x0422;
+        private static final int ID_EXIF_3 = 0x0423;
+        private static final int ID_XMP = 0x0424;
+
+        private int id;
+        private String name;
+        private byte[] data;
+        private int totalLength;
+
+        private ResourceBlock(InputStream stream) throws IOException, 
TikaException {
+            // Verify the signature
+            long sig = EndianUtils.readIntBE(stream);
+            if (sig != SIGNATURE) {
+                throw new TikaException("Invalid Image Resource Block 
Signature Found, got " +
+                        sig + " 0x" + Long.toHexString(sig) + " but the spec 
defines " + SIGNATURE);
+            }
+
+            // Read the block
+            id = EndianUtils.readUShortBE(stream);
+
+            StringBuffer nameB = new StringBuffer();
+            int nameLen = 0;
+            while (true) {
+                int v = stream.read();
+                nameLen++;
+
+                if (v == 0) {
+                    // The name length is padded to be even
+                    if (nameLen % 2 == 1) {
+                        stream.read();
+                        nameLen++;
+                    }
+                    break;
+                } else {
+                    nameB.append((char) v);
+                }
+                name = nameB.toString();
+            }
+
+            int dataLen = EndianUtils.readIntBE(stream);
+            if (dataLen % 2 == 1) {
+                // Data Length is even padded
+                dataLen = dataLen + 1;
+            }
+            totalLength = 4 + 2 + nameLen + 4 + dataLen;
+
+            data = new byte[dataLen];
+            IOUtils.readFully(stream, data);
+        }
+
+        private String getDataAsString() {
+            // Will be null padded
+            return new String(data, 0, data.length - 1, US_ASCII);
+        }
+    }
+}


Reply via email to