Author: tallison Date: Thu Jul 2 13:47:23 2015 New Revision: 1688827 URL: http://svn.apache.org/r1688827 Log: TIKA-1673 drop source file name from embedded file path; made a few java 7 updates; added timing for embedded docs
Modified: tika/trunk/CHANGES.txt tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java tika/trunk/tika-server/pom.xml Modified: tika/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/CHANGES.txt (original) +++ tika/trunk/CHANGES.txt Thu Jul 2 13:47:23 2015 @@ -1,4 +1,7 @@ Release 1.10 - Current Development + * Drop the source file name from the embedded file path in + RecursiveParserWrapper's "X-TIKA:embedded_resource_path" + (TIKA-1673). * Upgraded to Java 7 (TIKA-1536). Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java (original) +++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Thu Jul 2 13:47:23 2015 @@ -175,7 +175,7 @@ public class TikaCLITest { int title = json.indexOf("\"title\""); assertTrue(enc > -1 && fb > -1 && enc < fb); assertTrue (fb > -1 && title > -1 && fb < title); - assertTrue(json.contains("\"X-TIKA:digest:MD2\":\"470481522c33aa7f6558dfc5cc0c8135\"")); + assertTrue(json.contains("\"X-TIKA:digest:MD2\":")); } /** @@ -375,7 +375,7 @@ public class TikaCLITest { " \"Application-Version\": \"15.0000\",\n" + " \"Character Count\": \"28\",\n" + " \"Character-Count-With-Spaces\": \"31\",")); - assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": \"test_recursive_embedded.docx/embed1.zip\"")); + assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": \"/embed1.zip\"")); assertFalse(content.contains("X-TIKA:content")); } Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java (original) +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java Thu Jul 2 13:47:23 2015 @@ -93,7 +93,7 @@ public class RecursiveParserWrapper impl private final Parser wrappedParser; private final ContentHandlerFactory contentHandlerFactory; - private final List<Metadata> metadatas = new LinkedList<Metadata>(); + private final List<Metadata> metadatas = new LinkedList<>(); private final boolean catchEmbeddedExceptions; @@ -150,8 +150,7 @@ public class RecursiveParserWrapper impl Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { - String name = getResourceName(metadata); - EmbeddedParserDecorator decorator = new EmbeddedParserDecorator(name); + EmbeddedParserDecorator decorator = new EmbeddedParserDecorator("/"); context.set(Parser.class, decorator); ContentHandler localHandler = contentHandlerFactory.getNewContentHandler(); long started = new Date().getTime(); @@ -313,7 +312,7 @@ public class RecursiveParserWrapper impl Parser preContextParser = context.get(Parser.class); context.set(Parser.class, new EmbeddedParserDecorator(objectLocation)); - + long started = new Date().getTime(); try { super.parse(stream, localHandler, metadata, context); } catch (SAXException e) { @@ -328,14 +327,7 @@ public class RecursiveParserWrapper impl throw e; } } - } catch (IOException e) { - if (catchEmbeddedExceptions) { - String trace = ExceptionUtils.getStackTrace(e); - metadata.set(EMBEDDED_EXCEPTION, trace); - } else { - throw e; - } - } catch (TikaException e) { + } catch (IOException|TikaException e) { if (catchEmbeddedExceptions) { String trace = ExceptionUtils.getStackTrace(e); metadata.set(EMBEDDED_EXCEPTION, trace); @@ -344,6 +336,8 @@ public class RecursiveParserWrapper impl } } finally { context.set(Parser.class, preContextParser); + long elapsedMillis = new Date().getTime() - started; + metadata.set(PARSE_TIME_MILLIS, Long.toString(elapsedMillis)); } //Because of recursion, we need Modified: tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java (original) +++ tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java Thu Jul 2 13:47:23 2015 @@ -76,7 +76,7 @@ public class TestParsingExample { assertEquals("Number of embedded documents + 1 for the container document", 12, metadataList.size()); Metadata m = metadataList.get(6); //this is the location the embed3.txt text file within the outer .docx - assertEquals("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt", + assertEquals("/embed1.zip/embed2.zip/embed3.zip/embed3.txt", m.get("X-TIKA:embedded_resource_path")); //it contains some html encoded content assertContains("When in the Course", m.get("X-TIKA:content")); Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java Thu Jul 2 13:47:23 2015 @@ -166,17 +166,17 @@ public class RecursiveParserWrapperTest public void testEmbeddedResourcePath() throws Exception { Set<String> targets = new HashSet<String>(); - targets.add("test_recursive_embedded.docx/embed1.zip"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2a.txt"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2b.txt"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed1b.txt"); - targets.add("test_recursive_embedded.docx/embed1.zip/embed1a.txt"); - targets.add("test_recursive_embedded.docx/image1.emf"); + targets.add("/embed1.zip"); + targets.add("/embed1.zip/embed2.zip"); + targets.add("/embed1.zip/embed2.zip/embed3.zip"); + targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip"); + targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt"); + targets.add("/embed1.zip/embed2.zip/embed3.zip/embed3.txt"); + targets.add("/embed1.zip/embed2.zip/embed2a.txt"); + targets.add("/embed1.zip/embed2.zip/embed2b.txt"); + targets.add("/embed1.zip/embed1b.txt"); + targets.add("/embed1.zip/embed1a.txt"); + targets.add("/image1.emf"); Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx"); Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java Thu Jul 2 13:47:23 2015 @@ -168,7 +168,7 @@ public class SQLite3ParserTest extends T assertContains("The quick brown fox", metadataList.get(4).get(RecursiveParserWrapper.TIKA_CONTENT)); //confirm .doc was added to blob - assertEquals("testSqlite3b.db/BYTES_COL_0.doc/image1.png", metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH)); + assertEquals("/BYTES_COL_0.doc/image1.png", metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH)); } @Test Modified: tika/trunk/tika-server/pom.xml URL: http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1688827&r1=1688826&r2=1688827&view=diff ============================================================================== --- tika/trunk/tika-server/pom.xml (original) +++ tika/trunk/tika-server/pom.xml Thu Jul 2 13:47:23 2015 @@ -255,25 +255,6 @@ </execution> </executions> </plugin> - <plugin> - <groupId>com.qmino</groupId> - <artifactId>miredot-maven-plugin</artifactId> - <version>1.4</version> - <executions> - <execution> - <goals> - <goal>restdoc</goal> - </goals> - </execution> - </executions> - <configuration> - <licence> - <!-- Miredot license key valid until August 1st, 2016 when we can apply for a new one - http://s.apache.org/oE --> - UHJvamVjdHxvcmcuYXBhY2hlLnRpa2EudGlrYS1zZXJ2ZXJ8MjAxNi0wOC0wMXx0cnVlI01Dd0NGRklXRzRqRmNTZXNJb2laRElKZVF4RXpieUNTQWhSMHBmTzZCMUdMbDBPQ1B1WmJYQ3NpZElZSCtRPT0= - </licence> - <!-- insert other configuration here (optional) --> - </configuration> - </plugin> </plugins> </build> <profiles>