Repository: tika Updated Branches: refs/heads/2.x a1a2cdc61 -> 322b1c9f0
TIKA-1860 - Propose bundle artifact for multimedia module Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/c6d44760 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/c6d44760 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/c6d44760 Branch: refs/heads/2.x Commit: c6d4476003c9738617f323756b0211e310728c18 Parents: 6178e98 Author: Bob Paulin <[email protected]> Authored: Thu Feb 18 19:25:53 2016 -0600 Committer: Bob Paulin <[email protected]> Committed: Thu Feb 18 19:25:53 2016 -0600 ---------------------------------------------------------------------- .gitignore | 3 +- .../tika/osgi/TikaAbstractBundleActivator.java | 12 +- tika-parser-modules/pom.xml | 60 +++++- .../tika-parser-multimedia-module/pom.xml | 83 ++++++++ .../module/multimedia/internal/Activator.java | 36 ++++ .../java/org/apache/tika/module/BundleIT.java | 203 +++++++++++++++++++ 6 files changed, 394 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index c262c68..7da9077 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,5 @@ target *.iws *.bin nbactions.xml -nb-configuration.xml \ No newline at end of file +nb-configuration.xml +/bin/ http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java ---------------------------------------------------------------------- diff --git a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java index a6250e2..b959147 100644 --- a/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java +++ b/tika-core/src/main/java/org/apache/tika/osgi/TikaAbstractBundleActivator.java @@ -20,6 +20,7 @@ import java.util.Dictionary; import java.util.Enumeration; import java.util.Locale; import java.util.Properties; +import java.util.ServiceLoader; import org.apache.tika.parser.Parser; import org.osgi.framework.BundleActivator; @@ -37,8 +38,17 @@ public abstract class TikaAbstractBundleActivator implements BundleActivator { return serviceProps; } + + public void registerTikaParserServiceLoader(BundleContext context, ClassLoader loader) + { + ServiceLoader<Parser> serviceLoader = ServiceLoader.load(Parser.class, loader); + for(Parser currentParser: serviceLoader) + { + registerTikaService(context, currentParser, null); + } + } - public void registerTikaService(BundleContext context, Parser parserService, + void registerTikaService(BundleContext context, Parser parserService, Dictionary additionalServiceProperties) { String parserFullyClassifiedName = parserService.getClass().getCanonicalName().toLowerCase(Locale.US); http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml index 8e71c1b..61b42a0 100644 --- a/tika-parser-modules/pom.xml +++ b/tika-parser-modules/pom.xml @@ -59,9 +59,67 @@ </modules> <dependencies> + <dependency> + <groupId>org.osgi</groupId> + <artifactId>org.osgi.core</artifactId> + <scope>provided</scope> + <optional>true</optional> + </dependency> + <dependency> + <groupId>org.osgi</groupId> + <artifactId>org.osgi.compendium</artifactId> + <scope>provided</scope> + <optional>true</optional> + </dependency> <!-- Test dependencies --> <dependency> - <groupId>org.apache.tika</groupId> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-core</artifactId> + <version>1.7</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-junit4</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-container-native</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.felix</groupId> + <artifactId>org.apache.felix.framework</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.exam</groupId> + <artifactId>pax-exam-link-assembly</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.ops4j.pax.url</groupId> + <artifactId>pax-url-aether</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-simple</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>javax.inject</groupId> + <artifactId>javax.inject</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${project.groupId}</groupId> <artifactId>tika-test-resources</artifactId> <version>${project.version}</version> <type>test-jar</type> http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/pom.xml b/tika-parser-modules/tika-parser-multimedia-module/pom.xml index f15f3bd..13a7705 100644 --- a/tika-parser-modules/tika-parser-multimedia-module/pom.xml +++ b/tika-parser-modules/tika-parser-multimedia-module/pom.xml @@ -22,6 +22,7 @@ <artifactId>tika-parser-multimedia-module</artifactId> <name>Apache Tika parser multimedia module</name> <url>http://tika.apache.org/</url> + <packaging>bundle</packaging> <properties> <metadata.extractor.version>2.8.0</metadata.extractor.version> @@ -121,6 +122,88 @@ <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> </plugin> + <plugin> + <groupId>org.apache.felix</groupId> + <artifactId>maven-bundle-plugin</artifactId> + <extensions>true</extensions> + <configuration> + <classifier>bundle</classifier> + <instructions> + <Bundle-Activator>org.apache.tika.module.multimedia.internal.Activator</Bundle-Activator> + <_runsystempackages>com.sun.xml.bind.marshaller, + com.sun.xml.internal.bind.marshaller</_runsystempackages> + <Embed-Dependency> + metadata-extractor;inline=true, + xmpcore;inline=true, + commons-codec;inline=true, + commons-io;inline=true, + jempbox;inline=true, + fontbox;inline=true, + poi;inline=true, + isoparser;inline=true, + aspectjrt;inline=true + </Embed-Dependency> + <Embed-Transitive>true</Embed-Transitive> + <Export-Package> + org.apache.tika.parser.image.*, + org.apache.tika.parser.jpeg.*, + org.apache.tika.parser.audio.*, + org.apache.tika.parser.video.*, + org.apache.tika.parser.mp3.*, + org.apache.tika.parser.mp4.* + </Export-Package> + <Import-Package> + *, + com.adobe.xmp;resolution:=optional, + com.adobe.xmp.properties;resolution:=optional, + android.util;resolution:=optional + </Import-Package> + </instructions> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-jar-plugin</artifactId> + <executions> + <execution> + <phase>package</phase> + <goals> + <goal>jar</goal> + </goals> + <configuration> + <useDefaultManifestFile>true</useDefaultManifestFile> + <includes> + <include>org/apache/tika/**</include> + <include>META-INF/**</include> + </includes> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-failsafe-plugin</artifactId> + <executions> + <execution> + <goals> + <goal>integration-test</goal> + <goal>verify</goal> + </goals> + </execution> + </executions> + <configuration> + <systemPropertyVariables> + <org.ops4j.pax.logging.DefaultServiceLog.level> + WARN + </org.ops4j.pax.logging.DefaultServiceLog.level> + </systemPropertyVariables> + <systemProperties> + <property> + <name>project.bundle.file</name> + <value>target/${project.build.finalName}-bundle.jar</value> + </property> + </systemProperties> + </configuration> + </plugin> </plugins> </build> http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java new file mode 100644 index 0000000..7f53312 --- /dev/null +++ b/tika-parser-modules/tika-parser-multimedia-module/src/main/java/org/apache/tika/module/multimedia/internal/Activator.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.module.multimedia.internal; + +import org.apache.tika.osgi.TikaAbstractBundleActivator; +import org.osgi.framework.BundleContext; + +public class Activator extends TikaAbstractBundleActivator { + + @Override + public void start(BundleContext context) throws Exception { + + registerTikaParserServiceLoader(context, Activator.class.getClassLoader()); + + } + + @Override + public void stop(BundleContext context) throws Exception { + + } + +} http://git-wip-us.apache.org/repos/asf/tika/blob/c6d44760/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java new file mode 100644 index 0000000..5817691 --- /dev/null +++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/module/BundleIT.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.module; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNotNull; +import static org.ops4j.pax.exam.CoreOptions.bundle; +import static org.ops4j.pax.exam.CoreOptions.junitBundles; +import static org.ops4j.pax.exam.CoreOptions.options; +import static org.ops4j.pax.exam.CoreOptions.mavenBundle; + +import javax.inject.Inject; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.StringWriter; +import java.io.Writer; +import java.net.URISyntaxException; +import java.util.Dictionary; + +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.mime.MediaType; +import org.apache.tika.osgi.TikaService; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.BodyContentHandler; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.ops4j.pax.exam.Configuration; +import org.ops4j.pax.exam.Option; +import org.ops4j.pax.exam.junit.PaxExam; +import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy; +import org.ops4j.pax.exam.spi.reactors.PerMethod; +import org.osgi.framework.Bundle; +import org.osgi.framework.BundleContext; +import org.xml.sax.ContentHandler; + +@RunWith(PaxExam.class) +@ExamReactorStrategy(PerMethod.class) +public class BundleIT { + + private static final String BUNDLE_JAR_SYS_PROP = "project.bundle.file"; + @Inject + private BundleContext bc; + + @Configuration + public Option[] configuration() throws IOException, URISyntaxException { + String bundleFileName = System.getProperty(BUNDLE_JAR_SYS_PROP); + return options(junitBundles(), mavenBundle("org.apache.tika", "tika-core"), + bundle(new File(bundleFileName).toURI().toString())); + } + + @Test + public void testBundleLoaded() throws Exception { + boolean hasCore = false, hasBundle = false; + for (Bundle b : bc.getBundles()) { + if ("org.apache.tika.core".equals(b.getSymbolicName())) { + hasCore = true; + assertEquals("Core not activated", Bundle.ACTIVE, b.getState()); + } + if ("org.apache.tika.parser-multimedia-module".equals(b.getSymbolicName())) { + hasBundle = true; + assertEquals("Bundle not activated", Bundle.ACTIVE, b.getState()); + } + } + assertTrue("Core bundle not found", hasCore); + assertTrue("Image bundle not found", hasBundle); + } + + @Test + public void testImageParser() throws Exception { + TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class)); + InputStream stream = bc.getBundle().getResource("/test-documents/testPNG.png").openStream(); + + assertNotNull(stream); + + Metadata metadata = new Metadata(); + TikaInputStream tikaStream = TikaInputStream.get(stream); + MediaType type = tikaService.detect(tikaStream, metadata); + + assertEquals("Media Type should be PNG", MediaType.image("png"), type); + + metadata.add(Metadata.CONTENT_TYPE, type.toString()); + Writer writer = new StringWriter(); + ContentHandler contentHandler = new BodyContentHandler(writer); + ParseContext context = new ParseContext(); + + tikaService.parse(tikaStream, contentHandler, metadata, context); + + assertEquals("Image Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH)); + } + + @Test + public void testJpegParser() throws Exception { + + TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class)); + InputStream stream = bc.getBundle().getResource("/test-documents/testJPEG.jpg").openStream(); + + assertNotNull(stream); + + Metadata metadata = new Metadata(); + TikaInputStream tikaStream = TikaInputStream.get(stream); + MediaType type = tikaService.detect(tikaStream, metadata); + + assertEquals("Media Type should be JPEG", MediaType.image("jpeg"), type); + + metadata.add(Metadata.CONTENT_TYPE, type.toString()); + Writer writer = new StringWriter(); + ContentHandler contentHandler = new BodyContentHandler(writer); + ParseContext context = new ParseContext(); + + tikaService.parse(tikaStream, contentHandler, metadata, context); + + assertEquals("Jpg Output Width Should Match", "100", metadata.get(Metadata.IMAGE_WIDTH)); + } + @Test + public void testVideoParser() throws Exception { + TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class)); + InputStream stream = bc.getBundle().getResource("/test-documents/testFLV.flv").openStream(); + + assertNotNull(stream); + + Metadata metadata = new Metadata(); + TikaInputStream tikaStream = TikaInputStream.get(stream); + MediaType type = tikaService.detect(tikaStream, metadata); + + assertEquals("Media Type should be FLV", MediaType.video("x-flv"), type); + + metadata.add(Metadata.CONTENT_TYPE, type.toString()); + Writer writer = new StringWriter(); + ContentHandler contentHandler = new BodyContentHandler(writer); + ParseContext context = new ParseContext(); + + tikaService.parse(tikaStream, contentHandler, metadata, context); + + assertEquals("Video Should have audio", "true", metadata.get("hasAudio")); + + } + + @Test + public void testMp3Parser() throws Exception { + TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class)); + InputStream stream = bc.getBundle().getResource("/test-documents/testMP3i18n.mp3").openStream(); + + assertNotNull(stream); + + Metadata metadata = new Metadata(); + TikaInputStream tikaStream = TikaInputStream.get(stream); + MediaType type = tikaService.detect(tikaStream, metadata); + + assertEquals("Media Type should be MP3", MediaType.audio("mpeg"), type); + + metadata.add(Metadata.CONTENT_TYPE, type.toString()); + Writer writer = new StringWriter(); + ContentHandler contentHandler = new BodyContentHandler(writer); + ParseContext context = new ParseContext(); + + tikaService.parse(tikaStream, contentHandler, metadata, context); + + assertEquals("MP3 should have title", "Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE)); + + } + + @Test + public void testMidiParser() throws Exception { + TikaService tikaService = bc.getService(bc.getServiceReference(TikaService.class)); + InputStream stream = bc.getBundle().getResource("/test-documents/testMID.mid").openStream(); + + assertNotNull(stream); + + Metadata metadata = new Metadata(); + TikaInputStream tikaStream = TikaInputStream.get(stream); + MediaType type = tikaService.detect(tikaStream, metadata); + + assertEquals("Media Type should be Midi", MediaType.audio("midi"), type); + + metadata.add(Metadata.CONTENT_TYPE, type.toString()); + Writer writer = new StringWriter(); + ContentHandler contentHandler = new BodyContentHandler(writer); + ParseContext context = new ParseContext(); + + tikaService.parse(tikaStream, contentHandler, metadata, context); + assertEquals("Midi should have 2 tracks", "2", metadata.get("tracks")); + } + +}
