Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/TiffParser.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/TiffParser.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/TiffParser.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/TiffParser.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.image; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.image.xmp.JempboxExtractor; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class TiffParser extends AbstractParser { + + /** + * Serial version UID + */ + private static final long serialVersionUID = -3941143576535464926L; + + private static final Set<MediaType> SUPPORTED_TYPES = + Collections.singleton(MediaType.image("tiff")); + + public Set<MediaType> getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + TemporaryResources tmp = new TemporaryResources(); + try { + TikaInputStream tis = TikaInputStream.get(stream, tmp); + new ImageMetadataExtractor(metadata).parseTiff(tis.getFile()); + new JempboxExtractor(metadata).parse(tis); + } finally { + tmp.dispose(); + } + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + xhtml.endDocument(); + } + +}
Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/WebPParser.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/WebPParser.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/WebPParser.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/WebPParser.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.image; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + + +public class WebPParser extends AbstractParser { + + /** + * Serial version UID + */ + private static final long serialVersionUID = -3941143576535464926L; + + private static final Set<MediaType> SUPPORTED_TYPES = + Collections.singleton(MediaType.image("webp")); + + public Set<MediaType> getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + TemporaryResources tmp = new TemporaryResources(); + try { + TikaInputStream tis = TikaInputStream.get(stream, tmp); + new ImageMetadataExtractor(metadata).parseWebP(tis.getFile()); + } finally { + tmp.dispose(); + } + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + xhtml.endDocument(); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.image.xmp; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.List; + +import org.apache.jempbox.xmp.XMPMetadata; +import org.apache.jempbox.xmp.XMPSchemaDublinCore; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; +import org.xml.sax.InputSource; + +import static java.nio.charset.StandardCharsets.UTF_8; + +public class JempboxExtractor { + + // The XMP spec says it must be unicode, but for most file formats it specifies "must be encoded in UTF-8" + private static final String DEFAULT_XMP_CHARSET = UTF_8.name(); + private XMPPacketScanner scanner = new XMPPacketScanner(); + private Metadata metadata; + + public JempboxExtractor(Metadata metadata) { + this.metadata = metadata; + } + + public void parse(InputStream file) throws IOException, TikaException { + ByteArrayOutputStream xmpraw = new ByteArrayOutputStream(); + if (!scanner.parse(file, xmpraw)) { + return; + } + + Reader decoded = new InputStreamReader( + new ByteArrayInputStream(xmpraw.toByteArray()), + DEFAULT_XMP_CHARSET); + try { + XMPMetadata xmp = XMPMetadata.load(new InputSource(decoded)); + XMPSchemaDublinCore dc = xmp.getDublinCoreSchema(); + if (dc != null) { + if (dc.getTitle() != null) { + metadata.set(TikaCoreProperties.TITLE, dc.getTitle()); + } + if (dc.getDescription() != null) { + metadata.set(TikaCoreProperties.DESCRIPTION, dc.getDescription()); + } + if (dc.getCreators() != null && dc.getCreators().size() > 0) { + metadata.set(TikaCoreProperties.CREATOR, joinCreators(dc.getCreators())); + } + if (dc.getSubjects() != null && dc.getSubjects().size() > 0) { + for (String keyword : dc.getSubjects()) { + metadata.add(TikaCoreProperties.KEYWORDS, keyword); + } + // TODO should we set KEYWORDS too? + // All tested photo managers set the same in Iptc.Application2.Keywords and Xmp.dc.subject + } + } + } catch (IOException e) { + // Could not parse embedded XMP metadata. That's not a serious + // problem, so we'll just ignore the issue for now. + // TODO: Make error handling like this configurable. + } + } + + protected String joinCreators(List<String> creators) { + if (creators == null || creators.size() == 0) { + return ""; + } + if (creators.size() == 1) { + return creators.get(0); + } + StringBuffer c = new StringBuffer(); + for (String s : creators) { + c.append(", ").append(s); + } + return c.substring(2); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/image/xmp/XMPPacketScanner.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id: XMPPacketParser.java 750418 2009-03-05 11:03:54Z vhennebert $ */ + +package org.apache.tika.parser.image.xmp; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import static java.nio.charset.StandardCharsets.US_ASCII; + +/** + * This class is a parser for XMP packets. By default, it tries to locate the first XMP packet + * it finds and parses it. + * <p/> + * Important: Before you use this class to look for an XMP packet in some random file, please read + * the chapter on "Scanning Files for XMP Packets" in the XMP specification! + * <p/> + * Thic class was branched from http://xmlgraphics.apache.org/ XMPPacketParser. + * See also org.semanticdesktop.aperture.extractor.xmp.XMPExtractor, a variant. + */ +public class XMPPacketScanner { + + private static final byte[] PACKET_HEADER; + private static final byte[] PACKET_HEADER_END; + private static final byte[] PACKET_TRAILER; + + static { + PACKET_HEADER = "<?xpacket begin=".getBytes(US_ASCII); + PACKET_HEADER_END = "?>".getBytes(US_ASCII); + PACKET_TRAILER = "<?xpacket".getBytes(US_ASCII); + } + + private static boolean skipAfter(InputStream in, byte[] match) throws IOException { + return skipAfter(in, match, null); + } + + private static boolean skipAfter(InputStream in, byte[] match, OutputStream out) + throws IOException { + int found = 0; + int len = match.length; + int b; + while ((b = in.read()) >= 0) { + if (b == match[found]) { + found++; + if (found == len) { + return true; + } + } else { + if (out != null) { + if (found > 0) { + out.write(match, 0, found); + } + out.write(b); + } + found = 0; + } + } + return false; + } + + /** + * Locates an XMP packet in a stream, parses it and returns the XMP metadata. If no + * XMP packet is found until the stream ends, null is returned. Note: This method + * only finds the first XMP packet in a stream. And it cannot determine whether it + * has found the right XMP packet if there are multiple packets. + * <p/> + * Does <em>not</em> close the stream. + * If XMP block was found reading can continue below the block. + * + * @param in the InputStream to search + * @param xmlOut to write the XMP packet to + * @return true if XMP packet is found, false otherwise + * @throws IOException if an I/O error occurs + * @throws TransformerException if an error occurs while parsing the XMP packet + */ + public boolean parse(InputStream in, OutputStream xmlOut) throws IOException { + if (!in.markSupported()) { + in = new java.io.BufferedInputStream(in); + } + boolean foundXMP = skipAfter(in, PACKET_HEADER); + if (!foundXMP) { + return false; + } + //TODO Inspect "begin" attribute! + if (!skipAfter(in, PACKET_HEADER_END)) { + throw new IOException("Invalid XMP packet header!"); + } + //TODO Do with TeeInputStream when Commons IO 1.4 is available + if (!skipAfter(in, PACKET_TRAILER, xmlOut)) { + throw new IOException("XMP packet not properly terminated!"); + } + return true; + } + +} + Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.jpeg; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.io.TemporaryResources; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.image.ImageMetadataExtractor; +import org.apache.tika.parser.image.xmp.JempboxExtractor; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class JpegParser extends AbstractParser { + + /** + * Serial version UID + */ + private static final long serialVersionUID = -1355028253756234603L; + + private static final Set<MediaType> SUPPORTED_TYPES = + Collections.singleton(MediaType.image("jpeg")); + + public Set<MediaType> getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler handler, + Metadata metadata, ParseContext context) + throws IOException, SAXException, TikaException { + TemporaryResources tmp = new TemporaryResources(); + try { + TikaInputStream tis = TikaInputStream.get(stream, tmp); + new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile()); + new JempboxExtractor(metadata).parse(tis); + } finally { + tmp.dispose(); + } + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + xhtml.endDocument(); + } + +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/AudioFrame.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.exception.TikaException; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * An Audio Frame in an MP3 file. These come after the ID3v2 tags in the file. + * Currently, only the header is processed, not the raw audio data. + */ +public class AudioFrame implements MP3Frame { + /** Constant for the MPEG version 1. */ + public static final int MPEG_V1 = 3; + + /** Constant for the MPEG version 2. */ + public static final int MPEG_V2 = 2; + + /** Constant for the MPEG version 2.5. */ + public static final int MPEG_V2_5 = 0; + + /** Constant for audio layer 1. */ + public static final int LAYER_1 = 3; + + /** Constant for audio layer 2. */ + public static final int LAYER_2 = 2; + + /** Constant for audio layer 3. */ + public static final int LAYER_3 = 1; + + private final String version; + private final int versionCode; + private final int layer; + private final int sampleRate; + private final int channels; + private final int bitRate; + private final int length; + private final float duration; + + public String getVersion() { + return version; + } + + /** + * Get the sampling rate, in Hz + */ + public int getSampleRate() { + return sampleRate; + } + + /** + * Get the number of channels (1=mono, 2=stereo) + */ + public int getChannels() { + return channels; + } + + /** + * Get the version code. + * @return the version code (one of the {@code MPEG} constants) + */ + public int getVersionCode() + { + return versionCode; + } + + /** + * Get the audio layer code. + * @return the audio layer (one of the {@code LAYER} constants) + */ + public int getLayer() + { + return layer; + } + + /** + * Get the bit rate in bit per second. + * @return the bit rate + */ + public int getBitRate() + { + return bitRate; + } + + /** + * Returns the frame length in bytes. + * @return the frame length + */ + public int getLength() + { + return length; + } + + /** + * Returns the duration in milliseconds. + * @return the duration + */ + public float getDuration() + { + return duration; + } + + /** + * Does this appear to be a 4 byte audio frame header? + */ + public static boolean isAudioHeader(int h1, int h2, int h3, int h4) { + if (h1 == -1 || h2 == -1 || h3 == -1 || h4 == -1) { + return false; + } + // Check for the magic 11 bits set at the start + // Note - doesn't do a CRC check + if (h1 == 0xff && (h2 & 0x60) == 0x60) { + return true; + } + return false; + } + + /** + * @deprecated Use the constructor which is passed all values directly. + */ + @Deprecated + public AudioFrame(InputStream stream, ContentHandler handler) + throws IOException, SAXException, TikaException { + this(-2, -2, -2, -2, stream); + } + + /** + * @deprecated Use the constructor which is passed all values directly. + */ + @Deprecated + public AudioFrame(int h1, int h2, int h3, int h4, InputStream in) + throws IOException { + if (h1 == -2 && h2 == -2 && h3 == -2 && h4 == -2) { + h1 = in.read(); + h2 = in.read(); + h3 = in.read(); + h4 = in.read(); + } + + if (isAudioHeader(h1, h2, h3, h4)) { + layer = (h2 >> 1) & 0x03; + versionCode = (h2 >> 3) & 0x03; + version = generateVersionStr(versionCode, layer); + + int rateCode = (h3 >> 2) & 0x03; + int rate; + switch (rateCode) { + case 0: + rate = 11025; + break; + case 1: + rate = 12000; + break; + default: + rate = 8000; + } + if (versionCode == MPEG_V2) { + rate *= 2; + } else if(versionCode == MPEG_V1) { + rate *= 4; + } + sampleRate = rate; + + int chans = h4 & 0x192; + if (chans < 3) { + // Stereo, joint stereo, dual channel + channels = 2; + } else { + channels = 1; + } + bitRate = 0; + duration = 0; + length = 0; + } else { + throw new IllegalArgumentException("Magic Audio Frame Header not found"); + } + } + + /** + * + * Creates a new instance of {@code AudioFrame} and initializes all properties. + * @param mpegVersion the code for the MPEG version + * @param layer the code for the layer + * @param bitRate the bit rate (in bps) + * @param sampleRate the sample rate (in samples per second) + * @param channels the number of channels + * @param length the frame length (in bytes) + * @param duration the duration of this frame (in milliseconds) + */ + public AudioFrame(int mpegVersion, int layer, int bitRate, int sampleRate, + int channels, int length, float duration) { + versionCode = mpegVersion; + this.layer = layer; + this.bitRate = bitRate; + this.sampleRate = sampleRate; + this.channels = channels; + this.length = length; + this.duration = duration; + version = generateVersionStr(mpegVersion, layer); + } + + /** + * Generates a string for the version of this audio frame. + * @param version the code for the MPEG version + * @param layer the code for the layer + * @return a string for the version + */ + private static String generateVersionStr(int version, int layer) { + StringBuilder buf = new StringBuilder(64); + buf.append("MPEG 3 Layer "); + if (layer == LAYER_3) { + buf.append("III"); + } else if (layer == LAYER_2) { + buf.append("II"); + } else if (layer == LAYER_1) { + buf.append("I"); + } else { + buf.append("(reserved)"); + } + + buf.append(" Version "); + if (version == MPEG_V2_5) { + buf.append("2.5"); + } else if(version == MPEG_V2) { + buf.append("2"); + } else if(version == MPEG_V1) { + buf.append("1"); + } else { + buf.append("(reseved)"); + } + + return buf.toString(); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.util.Collections; +import java.util.List; + +/** + * Takes an array of {@link ID3Tags} in preference order, and when asked for + * a given tag, will return it from the first {@link ID3Tags} that has it. + */ +public class CompositeTagHandler implements ID3Tags { + + private ID3Tags[] tags; + + public CompositeTagHandler(ID3Tags[] tags) { + this.tags = tags; + } + + public boolean getTagsPresent() { + for (ID3Tags tag : tags) { + if (tag.getTagsPresent()) { + return true; + } + } + return false; + } + + public String getTitle() { + for (ID3Tags tag : tags) { + if (tag.getTitle() != null) { + return tag.getTitle(); + } + } + return null; + } + + public String getArtist() { + for (ID3Tags tag : tags) { + if (tag.getArtist() != null) { + return tag.getArtist(); + } + } + return null; + } + + public String getAlbum() { + for (ID3Tags tag : tags) { + if (tag.getAlbum() != null) { + return tag.getAlbum(); + } + } + return null; + } + + public String getComposer() { + for (ID3Tags tag : tags) { + if (tag.getComposer() != null) { + return tag.getComposer(); + } + } + return null; + } + + public String getYear() { + for (ID3Tags tag : tags) { + if (tag.getYear() != null) { + return tag.getYear(); + } + } + return null; + } + + public List<ID3Comment> getComments() { + for (ID3Tags tag : tags) { + List<ID3Comment> comments = tag.getComments(); + if (comments != null && comments.size() > 0) { + return comments; + } + } + return Collections.emptyList(); + } + + public String getGenre() { + for (ID3Tags tag : tags) { + if (tag.getGenre() != null) { + return tag.getGenre(); + } + } + return null; + } + + public String getTrackNumber() { + for (ID3Tags tag : tags) { + if (tag.getTrackNumber() != null) { + return tag.getTrackNumber(); + } + } + return null; + } + + public String getAlbumArtist() { + for (ID3Tags tag : tags) { + if (tag.getAlbumArtist() != null) { + return tag.getAlbumArtist(); + } + } + return null; + } + + public String getDisc() { + for (ID3Tags tag : tags) { + if (tag.getDisc() != null) { + return tag.getDisc(); + } + } + return null; + } + + public String getCompilation() { + for (ID3Tags tag : tags) { + if (tag.getCompilation() != null) { + return tag.getCompilation(); + } + } + return null; + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,254 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.util.List; + +/** + * Interface that defines the common interface for ID3 tag parsers, + * such as ID3v1 and ID3v2.3. + * Implementations should return NULL if the file lacks a given + * tag, or if the tag isn't defined for the version. + * + * Note that so far, only the ID3v1 core tags are listed here. In + * future, we may wish to add more to cover the extra tags that + * our ID3v2 handlers can produce. + */ +public interface ID3Tags { + /** + * List of predefined genres. + * + * @see http://www.id3.org/id3v2-00 + */ + String[] GENRES = new String[] { + /* 0 */ "Blues", + /* 1 */ "Classic Rock", + /* 2 */ "Country", + /* 3 */ "Dance", + /* 4 */ "Disco", + /* 5 */ "Funk", + /* 6 */ "Grunge", + /* 7 */ "Hip-Hop", + /* 8 */ "Jazz", + /* 9 */ "Metal", + /* 10 */ "New Age", + /* 11 */ "Oldies", + /* 12 */ "Other", + /* 13 */ "Pop", + /* 14 */ "R&B", + /* 15 */ "Rap", + /* 16 */ "Reggae", + /* 17 */ "Rock", + /* 18 */ "Techno", + /* 19 */ "Industrial", + /* 20 */ "Alternative", + /* 21 */ "Ska", + /* 22 */ "Death Metal", + /* 23 */ "Pranks", + /* 24 */ "Soundtrack", + /* 25 */ "Euro-Techno", + /* 26 */ "Ambient", + /* 27 */ "Trip-Hop", + /* 28 */ "Vocal", + /* 29 */ "Jazz+Funk", + /* 30 */ "Fusion", + /* 31 */ "Trance", + /* 32 */ "Classical", + /* 33 */ "Instrumental", + /* 34 */ "Acid", + /* 35 */ "House", + /* 36 */ "Game", + /* 37 */ "Sound Clip", + /* 38 */ "Gospel", + /* 39 */ "Noise", + /* 40 */ "AlternRock", + /* 41 */ "Bass", + /* 42 */ "Soul", + /* 43 */ "Punk", + /* 44 */ "Space", + /* 45 */ "Meditative", + /* 46 */ "Instrumental Pop", + /* 47 */ "Instrumental Rock", + /* 48 */ "Ethnic", + /* 49 */ "Gothic", + /* 50 */ "Darkwave", + /* 51 */ "Techno-Industrial", + /* 52 */ "Electronic", + /* 53 */ "Pop-Folk", + /* 54 */ "Eurodance", + /* 55 */ "Dream", + /* 56 */ "Southern Rock", + /* 57 */ "Comedy", + /* 58 */ "Cult", + /* 59 */ "Gangsta", + /* 60 */ "Top 40", + /* 61 */ "Christian Rap", + /* 62 */ "Pop/Funk", + /* 63 */ "Jungle", + /* 64 */ "Native American", + /* 65 */ "Cabaret", + /* 66 */ "New Wave", + /* 67 */ "Psychadelic", + /* 68 */ "Rave", + /* 69 */ "Showtunes", + /* 70 */ "Trailer", + /* 71 */ "Lo-Fi", + /* 72 */ "Tribal", + /* 73 */ "Acid Punk", + /* 74 */ "Acid Jazz", + /* 75 */ "Polka", + /* 76 */ "Retro", + /* 77 */ "Musical", + /* 78 */ "Rock & Roll", + /* 79 */ "Hard Rock", + /* 80 */ "Folk", + /* 81 */ "Folk-Rock", + /* 82 */ "National Folk", + /* 83 */ "Swing", + /* 84 */ "Fast Fusion", + /* 85 */ "Bebob", + /* 86 */ "Latin", + /* 87 */ "Revival", + /* 88 */ "Celtic", + /* 89 */ "Bluegrass", + /* 90 */ "Avantgarde", + /* 91 */ "Gothic Rock", + /* 92 */ "Progressive Rock", + /* 93 */ "Psychedelic Rock", + /* 94 */ "Symphonic Rock", + /* 95 */ "Slow Rock", + /* 96 */ "Big Band", + /* 97 */ "Chorus", + /* 98 */ "Easy Listening", + /* 99 */ "Acoustic", + /* 100 */ "Humour", + /* 101 */ "Speech", + /* 102 */ "Chanson", + /* 103 */ "Opera", + /* 104 */ "Chamber Music", + /* 105 */ "Sonata", + /* 106 */ "Symphony", + /* 107 */ "Booty Bass", + /* 108 */ "Primus", + /* 109 */ "Porn Groove", + /* 110 */ "Satire", + /* 111 */ "Slow Jam", + /* 112 */ "Club", + /* 113 */ "Tango", + /* 114 */ "Samba", + /* 115 */ "Folklore", + /* 116 */ "Ballad", + /* 117 */ "Power Ballad", + /* 118 */ "Rhythmic Soul", + /* 119 */ "Freestyle", + /* 120 */ "Duet", + /* 121 */ "Punk Rock", + /* 122 */ "Drum Solo", + /* 123 */ "A capella", + /* 124 */ "Euro-House", + /* 125 */ "Dance Hall", + /* sentinel */ "" + }; + + /** + * Does the file contain this kind of tags? + */ + boolean getTagsPresent(); + + String getTitle(); + + /** + * The Artist for the track + */ + String getArtist(); + + /** + * The Artist for the overall album / compilation of albums + */ + String getAlbumArtist(); + + String getAlbum(); + + String getComposer(); + + String getCompilation(); + + /** + * Retrieves the comments, if any. + * Files may have more than one comment, but normally only + * one with any language/description pair. + */ + List<ID3Comment> getComments(); + + String getGenre(); + + String getYear(); + + /** + * The number of the track within the album / recording + */ + String getTrackNumber(); + + /** + * The number of the disc this belongs to, within the set + */ + String getDisc(); + + /** + * Represents a comments in ID3 (especially ID3 v2), where are + * made up of several parts + */ + public static class ID3Comment { + private String language; + private String description; + private String text; + + /** + * Creates an ID3 v1 style comment tag + */ + public ID3Comment(String id3v1Text) { + this.text = id3v1Text; + } + /** + * Creates an ID3 v2 style comment tag + */ + public ID3Comment(String language, String description, String text) { + this.language = language; + this.description = description; + this.text = text; + } + + /** + * Gets the language, if present + */ + public String getLanguage() { + return language; + } + /** + * Gets the description, if present + */ + public String getDescription() { + return description; + } + /** + * Gets the text, if present + */ + public String getText() { + return text; + } + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,183 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; + +import org.apache.tika.exception.TikaException; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import static java.nio.charset.StandardCharsets.ISO_8859_1; + +/** + * This is used to parse ID3 Version 1 Tag information from an MP3 file, + * if available. + * + * @see <a href="http://www.id3.org/ID3v1">MP3 ID3 Version 1 specification</a> + */ +public class ID3v1Handler implements ID3Tags { + private String title; + private String artist; + private String album; + private String year; + private ID3Comment comment; + private String genre; + private String trackNumber; + + boolean found = false; + + public ID3v1Handler(InputStream stream, ContentHandler handler) + throws IOException, SAXException, TikaException { + this(LyricsHandler.getSuffix(stream, 128)); + } + + /** + * Creates from the last 128 bytes of a stream. + * @param tagData Must be the last 128 bytes + */ + protected ID3v1Handler(byte[] tagData) + throws IOException, SAXException, TikaException { + if (tagData.length == 128 + && tagData[0] == 'T' && tagData[1] == 'A' && tagData[2] == 'G') { + found = true; + + title = getString(tagData, 3, 33); + artist = getString(tagData, 33, 63); + album = getString(tagData, 63, 93); + year = getString(tagData, 93, 97); + + String commentStr = getString(tagData, 97, 127); + comment = new ID3Comment(commentStr); + + int genreID = (int) tagData[127] & 0xff; // unsigned byte + genre = GENRES[Math.min(genreID, GENRES.length - 1)]; + + // ID3v1.1 Track addition + // If the last two bytes of the comment field are zero and + // non-zero, then the last byte is the track number + if (tagData[125] == 0 && tagData[126] != 0) { + int trackNum = (int) tagData[126] & 0xff; + trackNumber = Integer.toString(trackNum); + } + } + } + + + public boolean getTagsPresent() { + return found; + } + + public String getTitle() { + return title; + } + + public String getArtist() { + return artist; + } + + public String getAlbum() { + return album; + } + + public String getYear() { + return year; + } + + public List<ID3Comment> getComments() { + return Arrays.asList(comment); + } + + public String getGenre() { + return genre; + } + + public String getTrackNumber() { + return trackNumber; + } + + /** + * ID3v1 doesn't have composers, + * so returns null; + */ + public String getComposer() { + return null; + } + + /** + * ID3v1 doesn't have album-wide artists, + * so returns null; + */ + public String getAlbumArtist() { + return null; + } + + /** + * ID3v1 doesn't have disc numbers, + * so returns null; + */ + public String getDisc() { + return null; + } + + /** + * ID3v1 doesn't have compilations, + * so returns null; + */ + public String getCompilation() { + return null; + } + + /** + * Returns the identified ISO-8859-1 substring from the given byte buffer. + * The return value is the zero-terminated substring retrieved from + * between the given start and end positions in the given byte buffer. + * Extra whitespace (and control characters) from the beginning and the + * end of the substring is removed. + * + * @param buffer byte buffer + * @param start start index of the substring + * @param end end index of the substring + * @return the identified substring + * @throws TikaException if the ISO-8859-1 encoding is not available + */ + private static String getString(byte[] buffer, int start, int end) + throws TikaException { + // Find the zero byte that marks the end of the string + int zero = start; + while (zero < end && buffer[zero] != 0) { + zero++; + } + + // Skip trailing whitespace + end = zero; + while (start < end && buffer[end - 1] <= ' ') { + end--; + } + + // Skip leading whitespace + while (start < end && buffer[start] <= ' ') { + start++; + } + + // Return the remaining substring + return new String(buffer, start, end - start, ISO_8859_1); + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTag; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator; +import org.xml.sax.SAXException; + +/** + * This is used to parse ID3 Version 2.2 Tag information from an MP3 file, + * if available. + * + * @see <a href="http://id3lib.sourceforge.net/id3/id3v2-00.txt">MP3 ID3 Version 2.2 specification</a> + */ +public class ID3v22Handler implements ID3Tags { + private String title; + private String artist; + private String album; + private String year; + private String composer; + private String genre; + private String trackNumber; + private String albumArtist; + private String disc; + private List<ID3Comment> comments = new ArrayList<ID3Comment>(); + + public ID3v22Handler(ID3v2Frame frame) + throws IOException, SAXException, TikaException { + RawTagIterator tags = new RawV22TagIterator(frame); + while (tags.hasNext()) { + RawTag tag = tags.next(); + if (tag.name.equals("TT2")) { + title = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TP1")) { + artist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TP2")) { + albumArtist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TAL")) { + album = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TYE")) { + year = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCM")) { + composer = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("COM")) { + comments.add( getComment(tag.data, 0, tag.data.length) ); + } else if (tag.name.equals("TRK")) { + trackNumber = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPA")) { + disc = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCO")) { + genre = extractGenre( getTagString(tag.data, 0, tag.data.length) ); + } + } + } + + private String getTagString(byte[] data, int offset, int length) { + return ID3v2Frame.getTagString(data, offset, length); + } + private ID3Comment getComment(byte[] data, int offset, int length) { + return ID3v2Frame.getComment(data, offset, length); + } + + protected static String extractGenre(String rawGenre) { + int open = rawGenre.indexOf("("); + int close = rawGenre.indexOf(")"); + if (open == -1 && close == -1) { + return rawGenre; + } else if (open < close) { + String genreStr = rawGenre.substring(0, open).trim(); + try { + int genreID = Integer.parseInt(rawGenre.substring(open+1, close)); + return ID3Tags.GENRES[genreID]; + } catch(ArrayIndexOutOfBoundsException invalidNum) { + return genreStr; + } catch(NumberFormatException notANum) { + return genreStr; + } + } else { + return null; + } + } + + public boolean getTagsPresent() { + return true; + } + + public String getTitle() { + return title; + } + + public String getArtist() { + return artist; + } + + public String getAlbum() { + return album; + } + + public String getYear() { + return year; + } + + public String getComposer() { + return composer; + } + + public List<ID3Comment> getComments() { + return comments; + } + + public String getGenre() { + return genre; + } + + public String getTrackNumber() { + return trackNumber; + } + + public String getAlbumArtist() { + return albumArtist; + } + + public String getDisc() { + return disc; + } + + /** + * ID3v22 doesn't have compilations, + * so returns null; + */ + public String getCompilation() { + return null; + } + + private class RawV22TagIterator extends RawTagIterator { + private RawV22TagIterator(ID3v2Frame frame) { + frame.super(3, 3, 1, 0); + } + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTag; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator; +import org.xml.sax.SAXException; + +/** + * This is used to parse ID3 Version 2.3 Tag information from an MP3 file, + * if available. + * + * @see <a href="http://id3lib.sourceforge.net/id3/id3v2.3.0.html">MP3 ID3 Version 2.3 specification</a> + */ +public class ID3v23Handler implements ID3Tags { + private String title; + private String artist; + private String album; + private String year; + private String composer; + private String genre; + private String trackNumber; + private String albumArtist; + private String disc; + private String compilation; + private List<ID3Comment> comments = new ArrayList<ID3Comment>(); + + public ID3v23Handler(ID3v2Frame frame) + throws IOException, SAXException, TikaException { + RawTagIterator tags = new RawV23TagIterator(frame); + while (tags.hasNext()) { + RawTag tag = tags.next(); + if (tag.name.equals("TIT2")) { + title = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPE1")) { + artist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPE2")) { + albumArtist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TALB")) { + album = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TYER")) { + year = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCOM")) { + composer = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("COMM")) { + comments.add( getComment(tag.data, 0, tag.data.length) ); + } else if (tag.name.equals("TRCK")) { + trackNumber = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPOS")) { + disc = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCMP")) { + compilation = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCON")) { + genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) ); + } + } + } + + private String getTagString(byte[] data, int offset, int length) { + return ID3v2Frame.getTagString(data, offset, length); + } + private ID3Comment getComment(byte[] data, int offset, int length) { + return ID3v2Frame.getComment(data, offset, length); + } + + public boolean getTagsPresent() { + return true; + } + + public String getTitle() { + return title; + } + + public String getArtist() { + return artist; + } + + public String getAlbum() { + return album; + } + + public String getYear() { + return year; + } + + public String getComposer() { + return composer; + } + + public List<ID3Comment> getComments() { + return comments; + } + + public String getGenre() { + return genre; + } + + public String getTrackNumber() { + return trackNumber; + } + + public String getAlbumArtist() { + return albumArtist; + } + + public String getDisc() { + return disc; + } + + public String getCompilation() { + return compilation; + } + + private class RawV23TagIterator extends RawTagIterator { + private RawV23TagIterator(ID3v2Frame frame) { + frame.super(4, 4, 1, 2); + } + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTag; +import org.apache.tika.parser.mp3.ID3v2Frame.RawTagIterator; +import org.xml.sax.SAXException; + +/** + * This is used to parse ID3 Version 2.4 Tag information from an MP3 file, + * if available. + * + * @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 specification</a> + * @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 frames/tags</a> + */ +public class ID3v24Handler implements ID3Tags { + private String title; + private String artist; + private String album; + private String year; + private String composer; + private String genre; + private String trackNumber; + private String albumArtist; + private String disc; + private String compilation; + private List<ID3Comment> comments = new ArrayList<ID3Comment>(); + + public ID3v24Handler(ID3v2Frame frame) + throws IOException, SAXException, TikaException { + RawTagIterator tags = new RawV24TagIterator(frame); + while (tags.hasNext()) { + RawTag tag = tags.next(); + if (tag.name.equals("TIT2")) { + title = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPE1")) { + artist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPE2")) { + albumArtist = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TALB")) { + album = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TYER")) { + year = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TDRC")) { + if(year == null) { + year = getTagString(tag.data, 0, tag.data.length); + } + } else if (tag.name.equals("TCOM")) { + composer = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("COMM")) { + comments.add( getComment(tag.data, 0, tag.data.length) ); + } else if (tag.name.equals("TRCK")) { + trackNumber = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TPOS")) { + disc = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCMP")) { + compilation = getTagString(tag.data, 0, tag.data.length); + } else if (tag.name.equals("TCON")) { + genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, tag.data.length) ); + } + } + } + + private String getTagString(byte[] data, int offset, int length) { + return ID3v2Frame.getTagString(data, offset, length); + } + private ID3Comment getComment(byte[] data, int offset, int length) { + return ID3v2Frame.getComment(data, offset, length); + } + + public boolean getTagsPresent() { + return true; + } + + public String getTitle() { + return title; + } + + public String getArtist() { + return artist; + } + + public String getAlbum() { + return album; + } + + public String getYear() { + return year; + } + + public String getComposer() { + return composer; + } + + public List<ID3Comment> getComments() { + return comments; + } + + public String getGenre() { + return genre; + } + + public String getTrackNumber() { + return trackNumber; + } + + public String getAlbumArtist() { + return albumArtist; + } + + public String getDisc() { + return disc; + } + + public String getCompilation() { + return compilation; + } + + private class RawV24TagIterator extends RawTagIterator { + private RawV24TagIterator(ID3v2Frame frame) { + frame.super(4, 4, 1, 2); + } + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/ID3v2Frame.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,424 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import java.io.UnsupportedEncodingException; +import java.util.Iterator; + +import org.apache.tika.parser.mp3.ID3Tags.ID3Comment; + +import static java.nio.charset.StandardCharsets.ISO_8859_1; + +/** + * A frame of ID3v2 data, which is then passed to a handler to + * be turned into useful data. + */ +public class ID3v2Frame implements MP3Frame { + private int majorVersion; + private int minorVersion; + private int flags; + private int length; + /** Excludes the header size part */ + private byte[] extendedHeader; + private byte[] data; + + public int getMajorVersion() { + return majorVersion; + } + + public int getMinorVersion() { + return minorVersion; + } + + public int getFlags() { + return flags; + } + + public int getLength() { + return length; + } + + public byte[] getExtendedHeader() { + return extendedHeader; + } + + public byte[] getData() { + return data; + } + + /** + * Returns the next ID3v2 Frame in + * the file, or null if the next batch of data + * doesn't correspond to either an ID3v2 header. + * If no ID3v2 frame could be detected and the passed in input stream is a + * {@code PushbackInputStream}, the bytes read so far are pushed back so + * that they can be read again. + * ID3v2 Frames should come before all Audio ones. + */ + public static MP3Frame createFrameIfPresent(InputStream inp) + throws IOException { + int h1 = inp.read(); + int h2 = inp.read(); + int h3 = inp.read(); + + // Is it an ID3v2 Frame? + if (h1 == (int)'I' && h2 == (int)'D' && h3 == (int)'3') { + int majorVersion = inp.read(); + int minorVersion = inp.read(); + if (majorVersion == -1 || minorVersion == -1) { + pushBack(inp, h1, h2, h3, majorVersion, minorVersion); + return null; + } + return new ID3v2Frame(majorVersion, minorVersion, inp); + } + + // Not a frame header + pushBack(inp, h1, h2, h3); + return null; + } + + /** + * Pushes bytes back into the stream if possible. This method is called if + * no ID3v2 header could be found at the current stream position. + * + * @param inp the input stream + * @param bytes the bytes to be pushed back + * @throws IOException if an error occurs + */ + private static void pushBack(InputStream inp, int... bytes) + throws IOException + { + if (inp instanceof PushbackInputStream) + { + byte[] buf = new byte[bytes.length]; + for (int i = 0; i < bytes.length; i++) + { + buf[i] = (byte) bytes[i]; + } + ((PushbackInputStream) inp).unread(buf); + } + } + + private ID3v2Frame(int majorVersion, int minorVersion, InputStream inp) + throws IOException { + this.majorVersion = majorVersion; + this.minorVersion = minorVersion; + + // Get the flags and the length + flags = inp.read(); + length = get7BitsInt(readFully(inp, 4), 0); + + // Do we have an extended header? + if ((flags & 0x02) == 0x02) { + int size = getInt(readFully(inp, 4)); + extendedHeader = readFully(inp, size); + } + + // Get the frame's data, or at least as much + // of it as we could do + data = readFully(inp, length, false); + } + + protected static int getInt(byte[] data) { + return getInt(data, 0); + } + + protected static int getInt(byte[] data, int offset) { + int b0 = data[offset+0] & 0xFF; + int b1 = data[offset+1] & 0xFF; + int b2 = data[offset+2] & 0xFF; + int b3 = data[offset+3] & 0xFF; + return (b0 << 24) + (b1 << 16) + (b2 << 8) + (b3 << 0); + } + + protected static int getInt3(byte[] data, int offset) { + int b0 = data[offset+0] & 0xFF; + int b1 = data[offset+1] & 0xFF; + int b2 = data[offset+2] & 0xFF; + return (b0 << 16) + (b1 << 8) + (b2 << 0); + } + + protected static int getInt2(byte[] data, int offset) { + int b0 = data[offset+0] & 0xFF; + int b1 = data[offset+1] & 0xFF; + return (b0 << 8) + (b1 << 0); + } + + /** + * AKA a Synchsafe integer. + * 4 bytes hold a 28 bit number. The highest + * bit in each byte is always 0 and always ignored. + */ + protected static int get7BitsInt(byte[] data, int offset) { + int b0 = data[offset+0] & 0x7F; + int b1 = data[offset+1] & 0x7F; + int b2 = data[offset+2] & 0x7F; + int b3 = data[offset+3] & 0x7F; + return (b0 << 21) + (b1 << 14) + (b2 << 7) + (b3 << 0); + } + + protected static byte[] readFully(InputStream inp, int length) + throws IOException { + return readFully(inp, length, true); + } + protected static byte[] readFully(InputStream inp, int length, boolean shortDataIsFatal) + throws IOException { + byte[] b = new byte[length]; + + int pos = 0; + int read; + while (pos < length) { + read = inp.read(b, pos, length-pos); + if (read == -1) { + if(shortDataIsFatal) { + throw new IOException("Tried to read " + length + " bytes, but only " + pos + " bytes present"); + } else { + // Give them what we found + // TODO Log the short read + return b; + } + } + pos += read; + } + + return b; + } + + protected static class TextEncoding { + public final boolean doubleByte; + public final String encoding; + private TextEncoding(String encoding, boolean doubleByte) { + this.doubleByte = doubleByte; + this.encoding = encoding; + } + } + protected static final TextEncoding[] encodings = new TextEncoding[] { + new TextEncoding("ISO-8859-1", false), + new TextEncoding("UTF-16", true), // With BOM + new TextEncoding("UTF-16BE", true), // Without BOM + new TextEncoding("UTF-8", false) + }; + + /** + * Returns the (possibly null padded) String at the given offset and + * length. String encoding is held in the first byte; + */ + protected static String getTagString(byte[] data, int offset, int length) { + int actualLength = length; + if (actualLength == 0) { + return ""; + } + if (actualLength == 1 && data[offset] == 0) { + return ""; + } + + // Does it have an encoding flag? + // Detect by the first byte being sub 0x20 + TextEncoding encoding = encodings[0]; + byte maybeEncodingFlag = data[offset]; + if (maybeEncodingFlag >= 0 && maybeEncodingFlag < encodings.length) { + offset++; + actualLength--; + encoding = encodings[maybeEncodingFlag]; + } + + // Trim off null termination / padding (as present) + while (encoding.doubleByte && actualLength >= 2 && data[offset+actualLength-1] == 0 && data[offset+actualLength-2] == 0) { + actualLength -= 2; + } + while (!encoding.doubleByte && actualLength >= 1 && data[offset+actualLength-1] == 0) { + actualLength--; + } + if (actualLength == 0) { + return ""; + } + + // TIKA-1024: If it's UTF-16 (with BOM) and all we + // have is a naked BOM then short-circuit here + // (return empty string), because new String(..) + // gives different results on different JVMs + if (encoding.encoding.equals("UTF-16") && actualLength == 2 && + ((data[offset] == (byte) 0xff && data[offset+1] == (byte) 0xfe) || + (data[offset] == (byte) 0xfe && data[offset+1] == (byte) 0xff))) { + return ""; + } + + try { + // Build the base string + return new String(data, offset, actualLength, encoding.encoding); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException( + "Core encoding " + encoding.encoding + " is not available", e); + } + } + /** + * Builds up the ID3 comment, by parsing and extracting + * the comment string parts from the given data. + */ + protected static ID3Comment getComment(byte[] data, int offset, int length) { + // Comments must have an encoding + int encodingFlag = data[offset]; + if (encodingFlag >= 0 && encodingFlag < encodings.length) { + // Good, valid flag + } else { + // Invalid string + return null; + } + + TextEncoding encoding = encodings[encodingFlag]; + + // First is a 3 byte language + String lang = getString(data, offset+1, 3); + + // After that we have [Desc]\0(\0)[Text] + int descStart = offset+4; + int textStart = -1; + String description = null; + String text = null; + + // Find where the description ends + try { + for (int i=descStart; i<offset+length; i++) { + if (encoding.doubleByte && data[i]==0 && data[i+1] == 0) { + // Handle LE vs BE on low byte text + if (i+2 < offset+length && data[i+1] == 0 && data[i+2] == 0) { + i++; + } + textStart = i+2; + description = new String(data, descStart, i-descStart, encoding.encoding); + break; + } + if (!encoding.doubleByte && data[i]==0) { + textStart = i+1; + description = new String(data, descStart, i-descStart, encoding.encoding); + break; + } + } + + // Did we find the end? + if (textStart > -1) { + text = new String(data, textStart, offset+length-textStart, encoding.encoding); + } else { + // Assume everything is the text + text = new String(data, descStart, offset+length-descStart, encoding.encoding); + } + + // Return + return new ID3Comment(lang, description, text); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException( + "Core encoding " + encoding.encoding + " is not available", e); + } + } + + /** + * Returns the String at the given + * offset and length. Strings are ISO-8859-1 + */ + protected static String getString(byte[] data, int offset, int length) { + return new String(data, offset, length, ISO_8859_1); + } + + + /** + * Iterates over id3v2 raw tags. + * Create an instance of this that configures the + * various length and multipliers. + */ + protected class RawTagIterator implements Iterator<RawTag> { + private int nameLength; + private int sizeLength; + private int sizeMultiplier; + private int flagLength; + + private int offset = 0; + + protected RawTagIterator( + int nameLength, int sizeLength, int sizeMultiplier, + int flagLength) { + this.nameLength = nameLength; + this.sizeLength = sizeLength; + this.sizeMultiplier = sizeMultiplier; + this.flagLength = flagLength; + } + + public boolean hasNext() { + // Check for padding at the end + return offset < data.length && data[offset] != 0; + } + + public RawTag next() { + RawTag tag = new RawTag(nameLength, sizeLength, sizeMultiplier, + flagLength, data, offset); + offset += tag.getSize(); + return tag; + } + + public void remove() { + } + + } + + protected static class RawTag { + private int headerSize; + protected String name; + protected int flag; + protected byte[] data; + + private RawTag( + int nameLength, int sizeLength, int sizeMultiplier, + int flagLength, byte[] frameData, int offset) { + headerSize = nameLength + sizeLength + flagLength; + + // Name, normally 3 or 4 bytes + name = getString(frameData, offset, nameLength); + + // Size + int rawSize; + if (sizeLength == 3) { + rawSize = getInt3(frameData, offset+nameLength); + } else { + rawSize = getInt(frameData, offset+nameLength); + } + int size = rawSize * sizeMultiplier; + + // Flag + if (flagLength > 0) { + if (flagLength == 1) { + flag = (int)frameData[offset+nameLength+sizeLength]; + } else { + flag = getInt2(frameData, offset+nameLength+sizeLength); + } + } + + // Now data + int copyFrom = offset+nameLength+sizeLength+flagLength; + size = Math.max(0, Math.min(size, frameData.length-copyFrom)); // TIKA-1218, prevent negative size for malformed files. + data = new byte[size]; + System.arraycopy(frameData, copyFrom, data, 0, size); + } + + protected int getSize() { + return headerSize + data.length; + } + + } + +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.tika.exception.TikaException; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import static java.nio.charset.StandardCharsets.US_ASCII; +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * This is used to parse Lyrics3 tag information + * from an MP3 file, if available. + * Handles lyrics tags of up to 10kb in size. + * Will process any ID3v1 tag data if present. + * Ignores extended ID3v1 data in the lyrics block + * + * @see <a href="http://www.id3.org/Lyrics3v2">Lyrics3 v2.0 specification</a> + */ +public class LyricsHandler { + boolean foundLyrics = false; + String lyricsText = null; + ID3v1Handler id3v1 = null; + + public LyricsHandler(InputStream stream, ContentHandler handler) + throws IOException, SAXException, TikaException { + this(getSuffix(stream, 10240+128)); + } + + /** + * Looks for the Lyrics data, which will be + * just before the ID3v1 data (if present), + * and process it. + * Also sets things up for the ID3v1 + * processing if required. + * Creates from the last 128 bytes of a stream. + */ + protected LyricsHandler(byte[] tagData) + throws IOException, SAXException, TikaException { + if(tagData.length < 128) { + return; + } + + // Is there ID3v1 data? + byte[] last128 = new byte[128]; + System.arraycopy(tagData, tagData.length-128, last128, 0, 128); + id3v1 = new ID3v1Handler(last128); + + if(tagData.length < 137) { + return; + } + + // Are there lyrics? Look for the closing Lyrics tag + // at the end to decide if there is any + int lookat = tagData.length - 9; + if(id3v1.found) { + lookat -= 128; + } + if(tagData[lookat+0] == 'L' && tagData[lookat+1] == 'Y' && + tagData[lookat+2] == 'R' && tagData[lookat+3] == 'I' && + tagData[lookat+4] == 'C' && tagData[lookat+5] == 'S' && + tagData[lookat+6] == '2' && tagData[lookat+7] == '0' && + tagData[lookat+8] == '0') { + foundLyrics = true; + + // The length (6 bytes) comes just before LYRICS200, and is the + // size including the LYRICSBEGIN but excluding the + // length+LYRICS200 at the end. + int length = Integer.parseInt( + new String(tagData, lookat-6, 6, UTF_8) + ); + + String lyrics = new String( + tagData, lookat-length+5, length-11, + US_ASCII + ); + + // Tags are a 3 letter code, 5 digit length, then data + int pos = 0; + while(pos < lyrics.length()-8) { + String tagName = lyrics.substring(pos, pos+3); + int tagLen = Integer.parseInt( + lyrics.substring(pos+3, pos+8) + ); + int startPos = pos + 8; + int endPos = startPos + tagLen; + + if(tagName.equals("LYR")) { + lyricsText = lyrics.substring(startPos, endPos); + } + + pos = endPos; + } + } + } + + public boolean hasID3v1() { + if(id3v1 == null || id3v1.found == false) { + return false; + } + return true; + } + public boolean hasLyrics() { + return lyricsText != null && lyricsText.length() > 0; + } + + /** + * Reads and returns the last <code>length</code> bytes from the + * given stream. + * @param stream input stream + * @param length number of bytes from the end to read and return + * @return stream the <code>InputStream</code> to read from. + * @throws IOException if the stream could not be read from. + */ + protected static byte[] getSuffix(InputStream stream, int length) + throws IOException { + byte[] buffer = new byte[2 * length]; + int bytesInBuffer = 0; + + int n = stream.read(buffer); + while (n != -1) { + bytesInBuffer += n; + if (bytesInBuffer == buffer.length) { + System.arraycopy(buffer, bytesInBuffer - length, buffer, 0, length); + bytesInBuffer = length; + } + n = stream.read(buffer, bytesInBuffer, buffer.length - bytesInBuffer); + } + + if (bytesInBuffer < length) { + length = bytesInBuffer; + } + + byte[] result = new byte[length]; + System.arraycopy(buffer, bytesInBuffer - length, result, 0, length); + return result; + } +} Added: tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java URL: http://svn.apache.org/viewvc/tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java?rev=1722029&view=auto ============================================================================== --- tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java (added) +++ tika/branches/2.x/tika-parser-modules/tika-multimedia-module/src/main/java/org/apache/tika/parser/mp3/MP3Frame.java Mon Dec 28 23:22:46 2015 @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.mp3; + + +/** + * A frame in an MP3 file, such as ID3v2 Tags or some + * audio. + */ +public interface MP3Frame { +}
