Author: tallison
Date: Thu Jul 2 13:47:23 2015
New Revision: 1688827
URL: http://svn.apache.org/r1688827
Log:
TIKA-1673 drop source file name from embedded file path; made a few java 7
updates; added timing for embedded docs
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
tika/trunk/tika-server/pom.xml
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Thu Jul 2 13:47:23 2015
@@ -1,4 +1,7 @@
Release 1.10 - Current Development
+ * Drop the source file name from the embedded file path in
+ RecursiveParserWrapper's "X-TIKA:embedded_resource_path"
+ (TIKA-1673).
* Upgraded to Java 7 (TIKA-1536).
Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Thu
Jul 2 13:47:23 2015
@@ -175,7 +175,7 @@ public class TikaCLITest {
int title = json.indexOf("\"title\"");
assertTrue(enc > -1 && fb > -1 && enc < fb);
assertTrue (fb > -1 && title > -1 && fb < title);
-
assertTrue(json.contains("\"X-TIKA:digest:MD2\":\"470481522c33aa7f6558dfc5cc0c8135\""));
+ assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
}
/**
@@ -375,7 +375,7 @@ public class TikaCLITest {
" \"Application-Version\": \"15.0000\",\n" +
" \"Character Count\": \"28\",\n" +
" \"Character-Count-With-Spaces\": \"31\","));
- assertTrue(content.contains("\"X-TIKA:embedded_resource_path\":
\"test_recursive_embedded.docx/embed1.zip\""));
+ assertTrue(content.contains("\"X-TIKA:embedded_resource_path\":
\"/embed1.zip\""));
assertFalse(content.contains("X-TIKA:content"));
}
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
Thu Jul 2 13:47:23 2015
@@ -93,7 +93,7 @@ public class RecursiveParserWrapper impl
private final Parser wrappedParser;
private final ContentHandlerFactory contentHandlerFactory;
- private final List<Metadata> metadatas = new LinkedList<Metadata>();
+ private final List<Metadata> metadatas = new LinkedList<>();
private final boolean catchEmbeddedExceptions;
@@ -150,8 +150,7 @@ public class RecursiveParserWrapper impl
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
- String name = getResourceName(metadata);
- EmbeddedParserDecorator decorator = new EmbeddedParserDecorator(name);
+ EmbeddedParserDecorator decorator = new EmbeddedParserDecorator("/");
context.set(Parser.class, decorator);
ContentHandler localHandler =
contentHandlerFactory.getNewContentHandler();
long started = new Date().getTime();
@@ -313,7 +312,7 @@ public class RecursiveParserWrapper impl
Parser preContextParser = context.get(Parser.class);
context.set(Parser.class, new
EmbeddedParserDecorator(objectLocation));
-
+ long started = new Date().getTime();
try {
super.parse(stream, localHandler, metadata, context);
} catch (SAXException e) {
@@ -328,14 +327,7 @@ public class RecursiveParserWrapper impl
throw e;
}
}
- } catch (IOException e) {
- if (catchEmbeddedExceptions) {
- String trace = ExceptionUtils.getStackTrace(e);
- metadata.set(EMBEDDED_EXCEPTION, trace);
- } else {
- throw e;
- }
- } catch (TikaException e) {
+ } catch (IOException|TikaException e) {
if (catchEmbeddedExceptions) {
String trace = ExceptionUtils.getStackTrace(e);
metadata.set(EMBEDDED_EXCEPTION, trace);
@@ -344,6 +336,8 @@ public class RecursiveParserWrapper impl
}
} finally {
context.set(Parser.class, preContextParser);
+ long elapsedMillis = new Date().getTime() - started;
+ metadata.set(PARSE_TIME_MILLIS, Long.toString(elapsedMillis));
}
//Because of recursion, we need
Modified:
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
---
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
(original)
+++
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
Thu Jul 2 13:47:23 2015
@@ -76,7 +76,7 @@ public class TestParsingExample {
assertEquals("Number of embedded documents + 1 for the container
document", 12, metadataList.size());
Metadata m = metadataList.get(6);
//this is the location the embed3.txt text file within the outer .docx
-
assertEquals("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt",
+ assertEquals("/embed1.zip/embed2.zip/embed3.zip/embed3.txt",
m.get("X-TIKA:embedded_resource_path"));
//it contains some html encoded content
assertContains("When in the Course", m.get("X-TIKA:content"));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
Thu Jul 2 13:47:23 2015
@@ -166,17 +166,17 @@ public class RecursiveParserWrapperTest
public void testEmbeddedResourcePath() throws Exception {
Set<String> targets = new HashSet<String>();
- targets.add("test_recursive_embedded.docx/embed1.zip");
- targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2a.txt");
-
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2b.txt");
- targets.add("test_recursive_embedded.docx/embed1.zip/embed1b.txt");
- targets.add("test_recursive_embedded.docx/embed1.zip/embed1a.txt");
- targets.add("test_recursive_embedded.docx/image1.emf");
+ targets.add("/embed1.zip");
+ targets.add("/embed1.zip/embed2.zip");
+ targets.add("/embed1.zip/embed2.zip/embed3.zip");
+ targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
+ targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
+ targets.add("/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
+ targets.add("/embed1.zip/embed2.zip/embed2a.txt");
+ targets.add("/embed1.zip/embed2.zip/embed2b.txt");
+ targets.add("/embed1.zip/embed1b.txt");
+ targets.add("/embed1.zip/embed1a.txt");
+ targets.add("/image1.emf");
Metadata metadata = new Metadata();
metadata.set(Metadata.RESOURCE_NAME_KEY,
"test_recursive_embedded.docx");
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
Thu Jul 2 13:47:23 2015
@@ -168,7 +168,7 @@ public class SQLite3ParserTest extends T
assertContains("The quick brown fox",
metadataList.get(4).get(RecursiveParserWrapper.TIKA_CONTENT));
//confirm .doc was added to blob
- assertEquals("testSqlite3b.db/BYTES_COL_0.doc/image1.png",
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
+ assertEquals("/BYTES_COL_0.doc/image1.png",
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
}
@Test
Modified: tika/trunk/tika-server/pom.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Thu Jul 2 13:47:23 2015
@@ -255,25 +255,6 @@
</execution>
</executions>
</plugin>
- <plugin>
- <groupId>com.qmino</groupId>
- <artifactId>miredot-maven-plugin</artifactId>
- <version>1.4</version>
- <executions>
- <execution>
- <goals>
- <goal>restdoc</goal>
- </goals>
- </execution>
- </executions>
- <configuration>
- <licence>
- <!-- Miredot license key valid until August 1st, 2016 when we can
apply for a new one - http://s.apache.org/oE -->
-
UHJvamVjdHxvcmcuYXBhY2hlLnRpa2EudGlrYS1zZXJ2ZXJ8MjAxNi0wOC0wMXx0cnVlI01Dd0NGRklXRzRqRmNTZXNJb2laRElKZVF4RXpieUNTQWhSMHBmTzZCMUdMbDBPQ1B1WmJYQ3NpZElZSCtRPT0=
- </licence>
- <!-- insert other configuration here (optional) -->
- </configuration>
- </plugin>
</plugins>
</build>
<profiles>