Author: rwesten
Date: Wed Apr 23 06:09:28 2014
New Revision: 1589341
URL: http://svn.apache.org/r1589341
Log:
STANBOL-1171: Updates Apache Tika to 1.5: Had to create an custom tika-parser
bundle (replacing the default org.apache.tika:tika-bunlde:1,5). The bundle was
added to commons/tikabundle. This is hopefully only a temporal solution as the
encountered issues will be reported as issue for Apache Tika
Added:
stanbol/branches/release-0.12/commons/tikabundle/
stanbol/branches/release-0.12/commons/tikabundle/pom.xml
stanbol/branches/release-0.12/commons/tikabundle/tikabundle.mdtext
Modified:
stanbol/branches/release-0.12/commons/pom.xml
stanbol/branches/release-0.12/enhancement-engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
stanbol/branches/release-0.12/launchers/bundlelists/enhancer/src/main/bundles/list.xml
stanbol/branches/release-0.12/parent/pom.xml
Modified: stanbol/branches/release-0.12/commons/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/commons/pom.xml?rev=1589341&r1=1589340&r2=1589341&view=diff
==============================================================================
--- stanbol/branches/release-0.12/commons/pom.xml (original)
+++ stanbol/branches/release-0.12/commons/pom.xml Wed Apr 23 06:09:28 2014
@@ -96,6 +96,9 @@
<module>solr/extras/icu</module> <!-- support for ICU -->
<module>solr/extras/stempel</module> <!-- support for the Polish stemmer
-->
+ <!-- Apache Tika Bundle-->
+ <module>tikabundle</module>
+
<module>jobs</module> <!-- Stanbol Background Jobs Framework -->
<module>opennlp</module> <!-- Allows to load OpenNLP modles via DataFile
provider -->
Added: stanbol/branches/release-0.12/commons/tikabundle/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/commons/tikabundle/pom.xml?rev=1589341&view=auto
==============================================================================
--- stanbol/branches/release-0.12/commons/tikabundle/pom.xml (added)
+++ stanbol/branches/release-0.12/commons/tikabundle/pom.xml Wed Apr 23
06:09:28 2014
@@ -0,0 +1,187 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>stanbol-parent</artifactId>
+ <version>4.1-SNAPSHOT</version>
+ <relativePath>../../parent</relativePath>
+ </parent>
+
+ <artifactId>org.apache.stanbol.commons.tikabundle</artifactId>
+ <version>0.12.1-SNAPSHOT</version>
+ <packaging>bundle</packaging>
+
+ <name>Apache Stanbol Commons Tika Bundle</name>
+ <description>
+ Alternative Bundle for the Tika Parsers
+ </description>
+
+ <inceptionYear>2014</inceptionYear>
+ <licenses>
+ <license>
+ <name>Apache Software License, Version 2.0</name>
+ <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
+ <distribution>repo</distribution>
+ <comments>A business-friendly OSS license</comments>
+ </license>
+ </licenses>
+
+ <scm>
+ <connection>
+
scm:svn:http://svn.apache.org/repos/asf/stanbol/branches/release-0.12/commons/tika
+ </connection>
+ <developerConnection>
+
scm:svn:https://svn.apache.org/repos/asf/stanbol/branches/release-0.12/commons/tika
+ </developerConnection>
+ <url>http://stanbol.apache.org</url>
+ </scm>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.felix</groupId>
+ <artifactId>maven-bundle-plugin</artifactId>
+ <extensions>true</extensions>
+ <configuration>
+ <instructions>
+ <Bundle-Activator>
+ org.apache.tika.parser.internal.Activator
+ </Bundle-Activator>
+ <Embed-Dependency>
+ tika-parsers;inline=true,
+ pdfbox,fontbox,jempbox,bcmail-jdk15,bcprov-jdk15,
+ poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
+ xmlbeans, dom4j,
+ tagsoup,
+ asm-debug-all,
+ juniversalchardet,
+ vorbis-java-core, vorbis-java-tika,
+ isoparser, aspectjrt,
+ metadata-extractor,
+ boilerpipe, rome,
+ apache-mime4j-core, apache-mime4j-dom,
+ jhighlight, netcdf, xmpcore
+ </Embed-Dependency>
+ <Embed-Transitive>true</Embed-Transitive>
+ <Bundle-DocURL>${project.url}</Bundle-DocURL>
+ <Export-Package>
+ !org.apache.tika.parser,
+ !org.apache.tika.parser.external,
+ org.apache.tika.parser.*; version=${tika-version}
+ </Export-Package>
+ <Import-Package>
+ *,
+ com.adobe.xmp;resolution:=optional,
+ com.adobe.xmp.properties;resolution:=optional,
+ com.google.protobuf;resolution:=optional,
+ com.ibm.icu.text;resolution:=optional,
+ com.sleepycat.je;resolution:=optional,
+ com.sun.javadoc;resolution:=optional,
+ com.sun.msv.datatype;resolution:=optional,
+ com.sun.msv.datatype.xsd;resolution:=optional,
+ com.sun.tools.javadoc;resolution:=optional,
+ edu.wisc.ssec.mcidas;resolution:=optional,
+ edu.wisc.ssec.mcidas.adde;resolution:=optional,
+ javax.activation;resolution:=optional,
+ javax.mail;resolution:=optional,
+ javax.mail.internet;resolution:=optional,
+ javax.xml.bind;resolution:=optional,
+ javax.xml.stream;version="[1.0,2)";resolution:=optional,
+ javax.xml.stream.events;version="[1.0,2)";resolution:=optional,
+ javax.xml.stream.util;version="[1.0,2)";resolution:=optional,
+ junit.framework;resolution:=optional,
+ junit.textui;resolution:=optional,
+ net.sf.ehcache;resolution:=optional,
+ nu.xom;resolution:=optional,
+ opendap.dap;resolution:=optional,
+ opendap.dap.parser;resolution:=optional,
+ org.apache.commons.httpclient;resolution:=optional,
+ org.apache.commons.httpclient.auth;resolution:=optional,
+ org.apache.commons.httpclient.methods;resolution:=optional,
+ org.apache.commons.httpclient.params;resolution:=optional,
+ org.apache.commons.httpclient.protocol;resolution:=optional,
+ org.apache.crimson.jaxp;resolution:=optional,
+ org.apache.tools.ant;resolution:=optional,
+ org.apache.tools.ant.taskdefs;resolution:=optional,
+ org.apache.tools.ant.types;resolution:=optional,
+ org.apache.xerces.parsers;resolution:=optional,
+ org.apache.xerces.util;resolution:=optional,
+ org.apache.xerces.xni;resolution:=optional,
+ org.apache.xerces.xni.parser;resolution:=optional,
+ org.apache.xml.resolver;resolution:=optional,
+ org.apache.xml.resolver.tools;resolution:=optional,
+ org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
+ org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
+ org.cyberneko.html.xercesbridge;resolution:=optional,
+ org.gjt.xpp;resolution:=optional,
+ org.jaxen;resolution:=optional,
+ org.jaxen.dom4j;resolution:=optional,
+ org.jaxen.pattern;resolution:=optional,
+ org.jaxen.saxpath;resolution:=optional,
+ org.jdom;resolution:=optional,
+ org.jdom.input;resolution:=optional,
+ org.jdom.output;resolution:=optional,
+
org.openxmlformats.schemas.officeDocument.x2006.math;resolution:=optional,
+
org.openxmlformats.schemas.schemaLibrary.x2006.main;resolution:=optional,
+ org.osgi.framework;resolution:=optional,
+ org.w3c.dom;resolution:=optional,
+ org.relaxng.datatype;resolution:=optional,
+ org.xml.sax;resolution:=optional,
+ org.xml.sax.ext;resolution:=optional,
+ org.xml.sax.helpers;resolution:=optional,
+ org.xmlpull.v1;resolution:=optional,
+ schemasMicrosoftComOfficePowerpoint;resolution:=optional,
+ schemasMicrosoftComOfficeWord;resolution:=optional,
+ ucar.grib;resolution:=optional,
+ ucar.grib.grib1;resolution:=optional,
+ ucar.grib.grib2;resolution:=optional,
+ ucar.grid;resolution:=optional,
+ visad;resolution:=optional,
+ visad.data;resolution:=optional,
+ visad.data.vis5d;resolution:=optional,
+ visad.jmet;resolution:=optional,
+ visad.util;resolution:=optional
+ </Import-Package>
+ </instructions>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <scope>provided</scope>
+ </dependency>
+ <dependency> <!-- avoids ClassNotFoundException:
com.adobe.xmp.XMPException not found -->
+ <groupId>com.adobe.xmp</groupId>
+ <artifactId>xmpcore</artifactId>
+ <version>5.1.2</version>
+ </dependency>
+ </dependencies>
+
+</project>
Added: stanbol/branches/release-0.12/commons/tikabundle/tikabundle.mdtext
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/commons/tikabundle/tikabundle.mdtext?rev=1589341&view=auto
==============================================================================
--- stanbol/branches/release-0.12/commons/tikabundle/tikabundle.mdtext (added)
+++ stanbol/branches/release-0.12/commons/tikabundle/tikabundle.mdtext Wed Apr
23 06:09:28 2014
@@ -0,0 +1,14 @@
+This is an alternative OSGI Bundle for the [Apache
Tika](http://tika.apache.org/)
+parsers.
+
+This replaces
+
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-bundle</artifactId>
+ <version>${tika-version}</version>
+ </dependency>
+
+`${tika-version}` of this release is set to `1.5`
+
+__TODO:__ add Jira issue for Apache Tika related to the issues fixed by this
bundle
\ No newline at end of file
Modified:
stanbol/branches/release-0.12/enhancement-engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java?rev=1589341&r1=1589340&r2=1589341&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/tika/src/test/java/org/apache/stanbol/enhancer/engines/tika/TikaEngineTest.java
Wed Apr 23 06:09:28 2014
@@ -366,7 +366,7 @@ public class TikaEngineTest {
new UriRef(NamespaceEnum.media+"Track"),
new UriRef(NamespaceEnum.media+"AudioTrack"));
//properties
- verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Stereo");
+ verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"hasFormat"), XSD.string, "Mono");
verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"samplingRate"), XSD.int_, "44100");
verifyValue(ci, audioTrack, new
UriRef(NamespaceEnum.media+"hasCompression"), XSD.string, "MP3");
}
Modified:
stanbol/branches/release-0.12/launchers/bundlelists/enhancer/src/main/bundles/list.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/launchers/bundlelists/enhancer/src/main/bundles/list.xml?rev=1589341&r1=1589340&r2=1589341&view=diff
==============================================================================
---
stanbol/branches/release-0.12/launchers/bundlelists/enhancer/src/main/bundles/list.xml
(original)
+++
stanbol/branches/release-0.12/launchers/bundlelists/enhancer/src/main/bundles/list.xml
Wed Apr 23 06:09:28 2014
@@ -38,14 +38,14 @@
<bundle> <!-- Apache Tika core (required by the LangId and TikaEngine) -->
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>1.2</version>
+ <version>1.5</version>
</bundle>
</startLevel>
<startLevel level="27">
- <bundle> <!-- Apache Tika bundle (required by the TikaEngine) -->
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-bundle</artifactId>
- <version>1.2</version>
+ <bundle> <!-- Stanbol version of the Apache Tika bundle -->
+ <groupId>org.apache.stanbol</groupId>
+ <artifactId>org.apache.stanbol.commons.tikabundle</artifactId>
+ <version>0.12.1-SNAPSHOT</version>
</bundle>
</startLevel>
<!-- Stanbol Enhancer infrastructure and required libraries -->
Modified: stanbol/branches/release-0.12/parent/pom.xml
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/parent/pom.xml?rev=1589341&r1=1589340&r2=1589341&view=diff
==============================================================================
--- stanbol/branches/release-0.12/parent/pom.xml (original)
+++ stanbol/branches/release-0.12/parent/pom.xml Wed Apr 23 06:09:28 2014
@@ -65,6 +65,7 @@
<sesame-version>2.7.7</sesame-version>
<marmotta-version>3.2.0-SNAPSHOT</marmotta-version>
<ldpath-version>${marmotta-version}</ldpath-version>
+ <tika-version>1.5</tika-version>
<sourceReleaseAssemblyDescriptor>stanbol-source-release-zip-tar</sourceReleaseAssemblyDescriptor>
</properties>
@@ -1329,17 +1330,17 @@
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
- <version>1.2</version>
+ <version>${tika-version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
- <version>1.2</version>
+ <version>${tika-version}</version>
</dependency>
<!-- dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-xmp</artifactId>
- <version>1.4</version>
+ <version>${tika-version}</version>
</dependency -->
<!-- Aperture -->
<dependency>