Author: jukka
Date: Tue Apr 28 13:13:12 2009
New Revision: 769383
URL: http://svn.apache.org/viewvc?rev=769383&view=rev
Log:
TIKA-219: Split Tika to separate modules
Step 1: Move everything to tika-core.
Added:
lucene/tika/trunk/tika-core/ (with props)
lucene/tika/trunk/tika-core/pom.xml (contents, props changed)
- copied, changed from r769380, lucene/tika/trunk/pom.xml
lucene/tika/trunk/tika-core/src/ (props changed)
- copied from r769380, lucene/tika/trunk/src/
Removed:
lucene/tika/trunk/src/
Modified:
lucene/tika/trunk/pom.xml
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
Modified: lucene/tika/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/pom.xml?rev=769383&r1=769382&r2=769383&view=diff
==============================================================================
--- lucene/tika/trunk/pom.xml (original)
+++ lucene/tika/trunk/pom.xml Tue Apr 28 13:13:12 2009
@@ -32,10 +32,11 @@
</parent>
<groupId>org.apache.tika</groupId>
- <artifactId>tika</artifactId>
+ <artifactId>tika-reactor</artifactId>
<version>0.4-SNAPSHOT</version>
+ <packaging>pom</packaging>
- <name>Apache Tika</name>
+ <name>Apache Tika reactor</name>
<!-- Keep on a single line, see http://jira.codehaus.org/browse/MJAR-39 -->
<description>Tika is a toolkit for detecting and extracting metadata and
structured text content from various documents using existing parser
libraries.</description>
@@ -173,271 +174,9 @@
<url>http://svn.apache.org/viewvc/lucene/tika/trunk</url>
</scm>
- <dependencies>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.1</version>
- <optional/>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.0.4</version>
- </dependency>
- <dependency>
- <groupId>commons-codec</groupId>
- <artifactId>commons-codec</artifactId>
- <version>1.3</version>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>1.4</version>
- </dependency>
- <dependency>
- <groupId>pdfbox</groupId>
- <artifactId>pdfbox</artifactId>
- <version>0.7.3</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- <version>3.5-beta5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-scratchpad</artifactId>
- <version>3.5-beta5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- <version>3.5-beta5</version>
- <exclusions>
- <exclusion>
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.geronimo.specs</groupId>
- <artifactId>geronimo-stax-api_1.0_spec</artifactId>
- <version>1.0</version>
- </dependency>
- <dependency>
- <groupId>net.sourceforge.nekohtml</groupId>
- <artifactId>nekohtml</artifactId>
- <version>1.9.9</version>
- </dependency>
- <dependency>
- <groupId>com.ibm.icu</groupId>
- <artifactId>icu4j</artifactId>
- <version>3.8</version>
- </dependency>
- <dependency>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- <version>3.1</version>
- </dependency>
- <dependency>
- <groupId>log4j</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.14</version>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <type>jar</type>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
- <build>
- <resources>
- <resource>
- <targetPath>org/apache/tika</targetPath>
- <directory>${basedir}/src/main/resources</directory>
- </resource>
- </resources>
- <plugins>
- <plugin>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.0.2</version>
- <configuration>
- <source>1.5</source>
- <target>1.5</target>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-resources-plugin</artifactId>
- <version>2.3</version>
- <configuration>
- <encoding>UTF-8</encoding>
- </configuration>
- <executions>
- <execution>
- <id>copy-resources</id>
- <phase>process-resources</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>
- ${project.build.directory}/classes/META-INF
- </outputDirectory>
- <resources>
- <resource>
- <directory>${basedir}</directory>
- <includes>
- <include>README.txt</include>
- <include>NOTICE.txt</include>
- <include>LICENSE.txt</include>
- </includes>
- </resource>
- </resources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.4.3</version>
- </plugin>
- <plugin>
- <artifactId>maven-jar-plugin</artifactId>
- <version>2.2</version>
- <configuration>
- <archive>
- <manifestEntries>
- <Specification-Title>${project.name}</Specification-Title>
- <Specification-Version>${project.version}</Specification-Version>
-
<Specification-Vendor>${project.organization.name}</Specification-Vendor>
- <Implementation-Title>${project.name}</Implementation-Title>
-
<Implementation-Version>${project.version}</Implementation-Version>
-
<Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
- <Implementation-Vendor-Id>org.apache</Implementation-Vendor-Id>
- </manifestEntries>
- <manifest>
- <addClasspath>true</addClasspath>
- <mainClass>org.apache.tika.cli.TikaCLI</mainClass>
- </manifest>
- </archive>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.2-beta-2</version>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/standalone.xml</descriptor>
- </descriptors>
- <archive>
- <manifest>
- <mainClass>org.apache.tika.cli.TikaCLI</mainClass>
- </manifest>
- </archive>
- </configuration>
- <executions>
- <execution>
- <phase>package</phase>
- <goals>
- <goal>single</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>retrotranslator-maven-plugin</artifactId>
- <version>1.0-alpha-4</version>
- <executions>
- <execution>
- <goals>
- <goal>translate-project</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- </plugins>
- </build>
-
- <reporting>
- <plugins>
-
- <!-- Produce JavaDoc -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-javadoc-plugin</artifactId>
- <version>2.2</version>
- <configuration>
- <aggregate>true</aggregate>
- <source>1.5</source>
- </configuration>
- </plugin>
-
- <!-- Produce Source cross references -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-jxr-plugin</artifactId>
- <version>2.1</version>
- <configuration>
- <aggregate>true</aggregate>
- </configuration>
- </plugin>
-
- <!-- Unit tests report -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-report-plugin</artifactId>
- <version>2.3</version>
- </plugin>
-
- <!-- "Release Audit" report (checks license headers etc.) -->
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>rat-maven-plugin</artifactId>
- <version>1.0-alpha-3</version>
- </plugin>
-
- <!-- FindBugs Report -->
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>findbugs-maven-plugin</artifactId>
- <version>1.0.0</version>
- <configuration>
- <threshold>Normal</threshold>
- <effort>Default</effort>
- </configuration>
- </plugin>
-
- <!-- Checkstyle report -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- <version>2.1</version>
- <configuration>
- <!--configLocation>checkstyle.xml</configLocation-->
- <enableRulesSummary>false</enableRulesSummary>
- </configuration>
- </plugin>
-
- <!-- PMD report -->
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-pmd-plugin</artifactId>
- <configuration>
- <linkXref>true</linkXref>
- <sourceEncoding>ASCII</sourceEncoding>
- <targetJdk>1.5</targetJdk>
- </configuration>
- </plugin>
-
- </plugins>
- </reporting>
+ <modules>
+ <module>tika-core</module>
+ </modules>
</project>
Propchange: lucene/tika/trunk/tika-core/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Apr 28 13:13:12 2009
@@ -0,0 +1,2 @@
+target
+.*
Copied: lucene/tika/trunk/tika-core/pom.xml (from r769380,
lucene/tika/trunk/pom.xml)
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/pom.xml?p2=lucene/tika/trunk/tika-core/pom.xml&p1=lucene/tika/trunk/pom.xml&r1=769380&r2=769383&rev=769383&view=diff
==============================================================================
--- lucene/tika/trunk/pom.xml (original)
+++ lucene/tika/trunk/tika-core/pom.xml Tue Apr 28 13:13:12 2009
@@ -32,147 +32,13 @@
</parent>
<groupId>org.apache.tika</groupId>
- <artifactId>tika</artifactId>
+ <artifactId>tika-core</artifactId>
<version>0.4-SNAPSHOT</version>
- <name>Apache Tika</name>
+ <name>Apache Tika core</name>
<!-- Keep on a single line, see http://jira.codehaus.org/browse/MJAR-39 -->
<description>Tika is a toolkit for detecting and extracting metadata and
structured text content from various documents using existing parser
libraries.</description>
- <url>http://lucene.apache.org/tika/</url>
-
- <issueManagement>
- <system>JIRA</system>
- <url>https://issues.apache.org/jira/browse/TIKA</url>
- </issueManagement>
-
- <mailingLists>
- <mailingList>
- <name>Development mailing list</name>
- <subscribe>[email protected]</subscribe>
- <unsubscribe>[email protected]</unsubscribe>
- <post>[email protected]</post>
-
<archive>http://mail-archives.apache.org/mod_mbox/lucene-tika-dev/</archive>
- <otherArchives>
-
<otherArchive>http://www.mail-archive.com/[email protected]/</otherArchive>
-
<otherArchive>http://www.mail-archive.com/[email protected]/</otherArchive>
-
<otherArchive>http://www.nabble.com/Apache-Tika---Development-f20913.html</otherArchive>
-
<otherArchive>http://news.gmane.org/gmane.comp.apache.tika.devel</otherArchive>
- <otherArchive>http://tika.markmail.org/</otherArchive>
- </otherArchives>
- </mailingList>
- <mailingList>
- <name>Commit mailing list</name>
- <subscribe>[email protected]</subscribe>
- <unsubscribe>[email protected]</unsubscribe>
- <post>[email protected]</post>
-
<archive>http://mail-archives.apache.org/mod_mbox/lucene-tika-commits/</archive>
- <otherArchives>
-
<otherArchive>http://www.mail-archive.com/[email protected]/</otherArchive>
-
<otherArchive>http://www.mail-archive.com/[email protected]/</otherArchive>
- </otherArchives>
- </mailingList>
- <mailingList>
- <name>User mailing list</name>
- <subscribe>[email protected]</subscribe>
- <unsubscribe>[email protected]</unsubscribe>
- <post>[email protected]</post>
-
<archive>http://mail-archives.apache.org/mod_mbox/lucene-tika-user/</archive>
- <otherArchives>
-
<otherArchive>http://www.mail-archive.com/[email protected]/</otherArchive>
- </otherArchives>
- </mailingList>
- </mailingLists>
-
- <developers>
- <developer>
- <name>Rida Benjelloun</name>
- <id>ridabenjelloun</id>
- <email>[email protected]</email>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Keith Bennett</name>
- <id>kbennett</id>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Mark Harwood</name>
- <id>mharwood</id>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Chris A. Mattmann</name>
- <id>mattmann</id>
- <email>[email protected]</email>
- <url>http://people.apache.org/~mattmann/</url>
- <organization>NASA Jet Propulsion Laboratory</organization>
- <organizationUrl>http://www.jpl.nasa.gov</organizationUrl>
- <timezone>-8</timezone>
- <properties/>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Dave Meikle</name>
- <id>dmeikle</id>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Sami Siren</name>
- <id>siren</id>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- <developer>
- <name>Jukka Zitting</name>
- <id>jukka</id>
- <roles>
- <role>committer</role>
- </roles>
- </developer>
- </developers>
- <contributors>
- <contributor>
- <name>Doug Cutting</name>
- <roles>
- <role>mentor</role>
- </roles>
- </contributor>
- <contributor>
- <name>Bertrand Delacretaz</name>
- <roles>
- <role>mentor</role>
- </roles>
- </contributor>
- <contributor>
- <name>Niall Pemberton</name>
- <roles>
- <role>emeritus</role>
- </roles>
- </contributor>
- </contributors>
-
- <scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/tika/trunk
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/tika/trunk
- </developerConnection>
- <url>http://svn.apache.org/viewvc/lucene/tika/trunk</url>
- </scm>
-
<dependencies>
<dependency>
<groupId>commons-lang</groupId>
Propchange: lucene/tika/trunk/tika-core/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/tika/trunk/tika-core/pom.xml
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-core/src/
------------------------------------------------------------------------------
svn:mergeinfo =
Modified:
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java?rev=769383&r1=769380&r2=769383&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
(original)
+++
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SecureContentHandlerTest.java
Tue Apr 28 13:13:12 2009
@@ -1,116 +1,116 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.IOException;
-
-import org.apache.commons.io.input.CountingInputStream;
-import org.apache.commons.io.input.NullInputStream;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-import junit.framework.TestCase;
-
-/**
- * Tests for the {...@link SecureContentHandler} class.
- */
-public class SecureContentHandlerTest extends TestCase {
-
- private static final int MANY_BYTES = 2000000;
-
- private CountingInputStream stream;
-
- private SecureContentHandler handler;
-
- protected void setUp() {
- stream = new CountingInputStream(new NullInputStream(MANY_BYTES));
- handler = new SecureContentHandler(new DefaultHandler(), stream);
- }
-
- public void testZeroCharactersPerByte() throws IOException {
- try {
- char[] ch = new char[] { 'x' };
- for (int i = 0; i < MANY_BYTES; i++) {
- stream.read();
- }
- handler.characters(ch, 0, 1);
- } catch (SAXException e) {
- fail("Unexpected SAXException");
- }
- }
-
- public void testOneCharacterPerByte() throws IOException {
- try {
- char[] ch = new char[1];
- for (int i = 0; i < MANY_BYTES; i++) {
- stream.read();
- handler.characters(ch, 0, ch.length);
- }
- } catch (SAXException e) {
- fail("Unexpected SAXException");
- }
- }
-
- public void testTenCharactersPerByte() throws IOException {
- try {
- char[] ch = new char[10];
- for (int i = 0; i < MANY_BYTES; i++) {
- stream.read();
- handler.characters(ch, 0, ch.length);
- }
- } catch (SAXException e) {
- fail("Unexpected SAXException");
- }
- }
-
- public void testManyCharactersPerByte() throws IOException {
- try {
- char[] ch = new char[1000];
- for (int i = 0; i < MANY_BYTES; i++) {
- stream.read();
- handler.characters(ch, 0, ch.length);
- }
- fail("Expected SAXException not thrown");
- } catch (SAXException e) {
- // expected
- }
- }
-
- public void testSomeCharactersWithoutInput() throws IOException {
- try {
- char[] ch = new char[100];
- for (int i = 0; i < 100; i++) {
- handler.characters(ch, 0, ch.length);
- }
- } catch (SAXException e) {
- fail("Unexpected SAXException");
- }
- }
-
- public void testManyCharactersWithoutInput() throws IOException {
- try {
- char[] ch = new char[100];
- for (int i = 0; i < 20000; i++) {
- handler.characters(ch, 0, ch.length);
- }
- fail("Expected SAXException not thrown");
- } catch (SAXException e) {
- // expected
- }
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+
+import org.apache.commons.io.input.CountingInputStream;
+import org.apache.commons.io.input.NullInputStream;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for the {...@link SecureContentHandler} class.
+ */
+public class SecureContentHandlerTest extends TestCase {
+
+ private static final int MANY_BYTES = 2000000;
+
+ private CountingInputStream stream;
+
+ private SecureContentHandler handler;
+
+ protected void setUp() {
+ stream = new CountingInputStream(new NullInputStream(MANY_BYTES));
+ handler = new SecureContentHandler(new DefaultHandler(), stream);
+ }
+
+ public void testZeroCharactersPerByte() throws IOException {
+ try {
+ char[] ch = new char[] { 'x' };
+ for (int i = 0; i < MANY_BYTES; i++) {
+ stream.read();
+ }
+ handler.characters(ch, 0, 1);
+ } catch (SAXException e) {
+ fail("Unexpected SAXException");
+ }
+ }
+
+ public void testOneCharacterPerByte() throws IOException {
+ try {
+ char[] ch = new char[1];
+ for (int i = 0; i < MANY_BYTES; i++) {
+ stream.read();
+ handler.characters(ch, 0, ch.length);
+ }
+ } catch (SAXException e) {
+ fail("Unexpected SAXException");
+ }
+ }
+
+ public void testTenCharactersPerByte() throws IOException {
+ try {
+ char[] ch = new char[10];
+ for (int i = 0; i < MANY_BYTES; i++) {
+ stream.read();
+ handler.characters(ch, 0, ch.length);
+ }
+ } catch (SAXException e) {
+ fail("Unexpected SAXException");
+ }
+ }
+
+ public void testManyCharactersPerByte() throws IOException {
+ try {
+ char[] ch = new char[1000];
+ for (int i = 0; i < MANY_BYTES; i++) {
+ stream.read();
+ handler.characters(ch, 0, ch.length);
+ }
+ fail("Expected SAXException not thrown");
+ } catch (SAXException e) {
+ // expected
+ }
+ }
+
+ public void testSomeCharactersWithoutInput() throws IOException {
+ try {
+ char[] ch = new char[100];
+ for (int i = 0; i < 100; i++) {
+ handler.characters(ch, 0, ch.length);
+ }
+ } catch (SAXException e) {
+ fail("Unexpected SAXException");
+ }
+ }
+
+ public void testManyCharactersWithoutInput() throws IOException {
+ try {
+ char[] ch = new char[100];
+ for (int i = 0; i < 20000; i++) {
+ handler.characters(ch, 0, ch.length);
+ }
+ fail("Expected SAXException not thrown");
+ } catch (SAXException e) {
+ // expected
+ }
+ }
+
+}