Author: jukka
Date: Wed Nov 7 12:52:44 2012
New Revision: 1406597
URL: http://svn.apache.org/viewvc?rev=1406597&view=rev
Log:
TIKA-799: ForkParser does not populate metadata object after completing a parse
Get the metadata from the XHTML head
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkTestParser.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java?rev=1406597&r1=1406596&r2=1406597&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/fork/ForkParser.java Wed
Nov 7 12:52:44 2012
@@ -29,6 +29,7 @@ import org.apache.tika.parser.AbstractPa
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -128,7 +129,9 @@ public class ForkParser extends Abstract
boolean alive = false;
ForkClient client = acquireClient();
try {
- t = client.call("parse", stream, handler, metadata, context);
+ ContentHandler tee = new TeeContentHandler(
+ handler, new MetadataContentHandler(metadata));
+ t = client.call("parse", stream, tee, metadata, context);
alive = true;
} catch (TikaException te) {
// Problem occurred on our side
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java?rev=1406597&view=auto
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java
(added)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/fork/MetadataContentHandler.java
Wed Nov 7 12:52:44 2012
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fork;
+
+import org.apache.tika.metadata.Metadata;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+class MetadataContentHandler extends DefaultHandler {
+
+ private final Metadata metadata;
+
+ public MetadataContentHandler(Metadata metadata) {
+ this.metadata = metadata;
+ }
+
+ public void startElement(
+ String uri, String local, String name, Attributes attributes)
+ throws SAXException {
+ if ("meta".equals(local)) {
+ String aname = attributes.getValue("name");
+ String content = attributes.getValue("content");
+ metadata.add(aname, content);
+ }
+ }
+
+}
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java?rev=1406597&r1=1406596&r2=1406597&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkParserTest.java
Wed Nov 7 12:52:44 2012
@@ -38,11 +38,13 @@ public class ForkParserTest extends Test
ForkParserTest.class.getClassLoader(),
new ForkTestParser());
try {
+ Metadata metadata = new Metadata();
ContentHandler output = new BodyContentHandler();
InputStream stream = new ByteArrayInputStream(new byte[0]);
ParseContext context = new ParseContext();
- parser.parse(stream, output, new Metadata(), context);
+ parser.parse(stream, output, metadata, context);
assertEquals("Hello, World!", output.toString().trim());
+ assertEquals("text/plain", metadata.get(Metadata.CONTENT_TYPE));
} finally {
parser.close();
}
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkTestParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkTestParser.java?rev=1406597&r1=1406596&r2=1406597&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkTestParser.java
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/fork/ForkTestParser.java
Wed Nov 7 12:52:44 2012
@@ -45,6 +45,8 @@ class ForkTestParser extends AbstractPar
throws IOException, SAXException, TikaException {
stream.read();
+ metadata.set(Metadata.CONTENT_TYPE, "text/plain");
+
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
char[] ch = "Hello, World!".toCharArray();