Author: jukka
Date: Fri Oct 16 16:03:42 2009
New Revision: 825972
URL: http://svn.apache.org/viewvc?rev=825972&view=rev
Log:
TIKA-300: rename openoffice.. parser classes to odf..
Added:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/
- copied from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
- copied, changed from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
- copied, changed from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
- copied, changed from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
Removed:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenOfficeContentParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenOfficeMetaParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenOfficeParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/NSNormalizerContentHandler.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml?rev=825972&r1=825971&r2=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
(original)
+++
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
Fri Oct 16 16:03:42 2009
@@ -75,7 +75,7 @@
<mime>text/plain</mime>
</parser>
- <parser name="parse-openoffice"
class="org.apache.tika.parser.opendocument.OpenOfficeParser">
+ <parser name="parse-odf"
class="org.apache.tika.parser.odf.OpenDocumentParser">
<mime>application/vnd.sun.xml.writer</mime>
<mime>application/vnd.oasis.opendocument.text</mime>
<mime>application/vnd.oasis.opendocument.graphics</mime>
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java?rev=825972&r1=825810&r2=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
Fri Oct 16 16:03:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.opendocument;
+package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.StringReader;
Copied:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
(from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java)
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java?p2=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java&p1=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java&r1=825810&r2=825972&rev=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeContentParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentContentParser.java
Fri Oct 16 16:03:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.opendocument;
+package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
@@ -48,7 +48,7 @@
/**
* Parser for OpenDocument <code>content.xml</code> files.
*/
-public class OpenOfficeContentParser implements Parser {
+public class OpenDocumentContentParser implements Parser {
public static final String TEXT_NS =
"urn:oasis:names:tc:opendocument:xmlns:text:1.0";
@@ -56,6 +56,9 @@
public static final String TABLE_NS =
"urn:oasis:names:tc:opendocument:xmlns:table:1.0";
+ public static final String OFFICE_NS =
+ "urn:oasis:names:tc:opendocument:xmlns:office:1.0";
+
public static final String XLINK_NS = "http://www.w3.org/1999/xlink";
protected static final char[] TAB = new char[] { '\t' };
@@ -85,6 +88,9 @@
new QName(TEXT_NS, "note"),
new TargetElement(XHTML, "div"));
MAPPINGS.put(
+ new QName(OFFICE_NS, "annotation"),
+ new TargetElement(XHTML, "div"));
+ MAPPINGS.put(
new QName(TEXT_NS, "span"),
new TargetElement(XHTML, "span"));
MAPPINGS.put(
Copied:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
(from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java)
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java?p2=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java&p1=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java&r1=825810&r2=825972&rev=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeMetaParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
Fri Oct 16 16:03:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.opendocument;
+package org.apache.tika.parser.odf;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.xml.DcXMLParser;
@@ -29,7 +29,7 @@
/**
* Parser for OpenDocument <code>meta.xml</code> files.
*/
-public class OpenOfficeMetaParser extends DcXMLParser {
+public class OpenDocumentMetaParser extends DcXMLParser {
private static final XPathParser META_XPATH = new XPathParser(
"meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0");
Copied:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
(from r825810,
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java)
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java?p2=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java&p1=lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java&r1=825810&r2=825972&rev=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
Fri Oct 16 16:03:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.tika.parser.opendocument;
+package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
@@ -34,11 +34,11 @@
/**
* OpenOffice parser
*/
-public class OpenOfficeParser implements Parser {
+public class OpenDocumentParser implements Parser {
- private Parser meta = new OpenOfficeMetaParser();
+ private Parser meta = new OpenDocumentMetaParser();
- private Parser content = new OpenOfficeContentParser();
+ private Parser content = new OpenDocumentContentParser();
public Parser getMetaParser() {
return meta;
@@ -68,7 +68,7 @@
metadata.set(Metadata.CONTENT_TYPE, type);
} else if (entry.getName().equals("meta.xml")) {
meta.parse(zip, new DefaultHandler(), metadata, context);
- } else if (entry.getName().equals("content.xml")) {
+ } else if (entry.getName().endsWith("content.xml")) {
content.parse(zip, handler, metadata, context);
}
entry = zip.getNextEntry();
Modified:
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java?rev=825972&r1=825971&r2=825972&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
Fri Oct 16 16:03:42 2009
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -16,73 +16,13 @@
*/
package org.apache.tika.parser.opendocument;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Map;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.Parser;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
+import org.apache.tika.parser.odf.OpenDocumentParser;
/**
* OpenOffice parser
+ *
+ * @deprecated Use the {...@link OpenDocumentParser} class instead.
+ * This class will be removed in Apache Tika 1.0.
*/
-public class OpenOfficeParser implements Parser {
-
- private Parser meta = new OpenOfficeMetaParser();
-
- private Parser content = new OpenOfficeContentParser();
-
- public Parser getMetaParser() {
- return meta;
- }
-
- public void setMetaParser(Parser meta) {
- this.meta = meta;
- }
-
- public Parser getContentParser() {
- return content;
- }
-
- public void setContentParser(Parser content) {
- this.content = content;
- }
-
- public void parse(
- InputStream stream, ContentHandler handler,
- Metadata metadata, Map<String, Object> context)
- throws IOException, SAXException, TikaException {
- ZipInputStream zip = new ZipInputStream(stream);
- ZipEntry entry = zip.getNextEntry();
- while (entry != null) {
- if (entry.getName().equals("mimetype")) {
- String type = IOUtils.toString(zip, "UTF-8");
- metadata.set(Metadata.CONTENT_TYPE, type);
- } else if (entry.getName().equals("meta.xml")) {
- meta.parse(zip, new DefaultHandler(), metadata, context);
- } else if (entry.getName().endsWith("content.xml")) {
- content.parse(zip, handler, metadata, context);
- }
- entry = zip.getNextEntry();
- }
- }
-
- /**
- * @deprecated This method will be removed in Apache Tika 1.0.
- */
- public void parse(
- InputStream stream, ContentHandler handler, Metadata metadata)
- throws IOException, SAXException, TikaException {
- Map<String, Object> context = Collections.emptyMap();
- parse(stream, handler, metadata, context);
- }
-
+public class OpenOfficeParser extends OpenDocumentParser {
}