Author: tallison
Date: Thu Jul  2 13:47:23 2015
New Revision: 1688827

URL: http://svn.apache.org/r1688827
Log:
TIKA-1673 drop source file name from embedded file path; made a few java 7 
updates; added timing for embedded docs

Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
    tika/trunk/tika-server/pom.xml

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Thu Jul  2 13:47:23 2015
@@ -1,4 +1,7 @@
 Release 1.10 - Current Development
+  * Drop the source file name from the embedded file path in
+    RecursiveParserWrapper's "X-TIKA:embedded_resource_path" 
+    (TIKA-1673).
 
   * Upgraded to Java 7 (TIKA-1536).
 

Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Thu 
Jul  2 13:47:23 2015
@@ -175,7 +175,7 @@ public class TikaCLITest {
         int title = json.indexOf("\"title\"");
         assertTrue(enc > -1 && fb > -1 && enc < fb);
         assertTrue (fb > -1 && title > -1 && fb < title);
-        
assertTrue(json.contains("\"X-TIKA:digest:MD2\":\"470481522c33aa7f6558dfc5cc0c8135\""));
+        assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
     }
 
     /**
@@ -375,7 +375,7 @@ public class TikaCLITest {
                 "    \"Application-Version\": \"15.0000\",\n" +
                 "    \"Character Count\": \"28\",\n" +
                 "    \"Character-Count-With-Spaces\": \"31\","));
-        assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": 
\"test_recursive_embedded.docx/embed1.zip\""));
+        assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": 
\"/embed1.zip\""));
         assertFalse(content.contains("X-TIKA:content"));
 
     }

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
 (original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
 Thu Jul  2 13:47:23 2015
@@ -93,7 +93,7 @@ public class RecursiveParserWrapper impl
  
     private final Parser wrappedParser;
     private final ContentHandlerFactory contentHandlerFactory;
-    private final List<Metadata> metadatas = new LinkedList<Metadata>();
+    private final List<Metadata> metadatas = new LinkedList<>();
 
     private final boolean catchEmbeddedExceptions;
 
@@ -150,8 +150,7 @@ public class RecursiveParserWrapper impl
             Metadata metadata, ParseContext context) throws IOException,
             SAXException, TikaException {
 
-        String name = getResourceName(metadata);
-        EmbeddedParserDecorator decorator = new EmbeddedParserDecorator(name);
+        EmbeddedParserDecorator decorator = new EmbeddedParserDecorator("/");
         context.set(Parser.class, decorator);
         ContentHandler localHandler = 
contentHandlerFactory.getNewContentHandler();
         long started = new Date().getTime();
@@ -313,7 +312,7 @@ public class RecursiveParserWrapper impl
             
             Parser preContextParser = context.get(Parser.class);
             context.set(Parser.class, new 
EmbeddedParserDecorator(objectLocation));
-
+            long started = new Date().getTime();
             try {
                 super.parse(stream, localHandler, metadata, context);
             } catch (SAXException e) {
@@ -328,14 +327,7 @@ public class RecursiveParserWrapper impl
                         throw e;
                     }
                 }
-            } catch (IOException e) {
-                if (catchEmbeddedExceptions) {
-                    String trace = ExceptionUtils.getStackTrace(e);
-                    metadata.set(EMBEDDED_EXCEPTION, trace);
-                } else {
-                    throw e;
-                }
-            } catch (TikaException e) {
+            } catch (IOException|TikaException e) {
                 if (catchEmbeddedExceptions) {
                     String trace = ExceptionUtils.getStackTrace(e);
                     metadata.set(EMBEDDED_EXCEPTION, trace);
@@ -344,6 +336,8 @@ public class RecursiveParserWrapper impl
                 }
             } finally {
                 context.set(Parser.class, preContextParser);
+                long elapsedMillis = new Date().getTime() - started;
+                metadata.set(PARSE_TIME_MILLIS, Long.toString(elapsedMillis));
             }
             
             //Because of recursion, we need

Modified: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
 (original)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
 Thu Jul  2 13:47:23 2015
@@ -76,7 +76,7 @@ public class TestParsingExample {
         assertEquals("Number of embedded documents + 1 for the container 
document", 12, metadataList.size());
         Metadata m = metadataList.get(6);
         //this is the location the embed3.txt text file within the outer .docx
-        
assertEquals("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt",
+        assertEquals("/embed1.zip/embed2.zip/embed3.zip/embed3.txt",
                 m.get("X-TIKA:embedded_resource_path"));
         //it contains some html encoded content
         assertContains("When in the Course", m.get("X-TIKA:content"));

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
 Thu Jul  2 13:47:23 2015
@@ -166,17 +166,17 @@ public class RecursiveParserWrapperTest
     public void testEmbeddedResourcePath() throws Exception {
 
         Set<String> targets = new HashSet<String>();
-        targets.add("test_recursive_embedded.docx/embed1.zip");
-        targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2a.txt");
-        
targets.add("test_recursive_embedded.docx/embed1.zip/embed2.zip/embed2b.txt");
-        targets.add("test_recursive_embedded.docx/embed1.zip/embed1b.txt");
-        targets.add("test_recursive_embedded.docx/embed1.zip/embed1a.txt");
-        targets.add("test_recursive_embedded.docx/image1.emf");
+        targets.add("/embed1.zip");
+        targets.add("/embed1.zip/embed2.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed4.zip/embed4.txt");
+        targets.add("/embed1.zip/embed2.zip/embed3.zip/embed3.txt");
+        targets.add("/embed1.zip/embed2.zip/embed2a.txt");
+        targets.add("/embed1.zip/embed2.zip/embed2b.txt");
+        targets.add("/embed1.zip/embed1b.txt");
+        targets.add("/embed1.zip/embed1a.txt");
+        targets.add("/image1.emf");
 
         Metadata metadata = new Metadata();
         metadata.set(Metadata.RESOURCE_NAME_KEY, 
"test_recursive_embedded.docx");

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
 Thu Jul  2 13:47:23 2015
@@ -168,7 +168,7 @@ public class SQLite3ParserTest extends T
         assertContains("The quick brown fox", 
metadataList.get(4).get(RecursiveParserWrapper.TIKA_CONTENT));
 
         //confirm .doc was added to blob
-        assertEquals("testSqlite3b.db/BYTES_COL_0.doc/image1.png", 
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
+        assertEquals("/BYTES_COL_0.doc/image1.png", 
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
     }
 
     @Test

Modified: tika/trunk/tika-server/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/pom.xml?rev=1688827&r1=1688826&r2=1688827&view=diff
==============================================================================
--- tika/trunk/tika-server/pom.xml (original)
+++ tika/trunk/tika-server/pom.xml Thu Jul  2 13:47:23 2015
@@ -255,25 +255,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>com.qmino</groupId>
-        <artifactId>miredot-maven-plugin</artifactId>
-        <version>1.4</version>
-        <executions>
-          <execution>
-            <goals>
-              <goal>restdoc</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <licence>
-            <!-- Miredot license key valid until August 1st, 2016 when we can 
apply for a new one - http://s.apache.org/oE -->
-            
UHJvamVjdHxvcmcuYXBhY2hlLnRpa2EudGlrYS1zZXJ2ZXJ8MjAxNi0wOC0wMXx0cnVlI01Dd0NGRklXRzRqRmNTZXNJb2laRElKZVF4RXpieUNTQWhSMHBmTzZCMUdMbDBPQ1B1WmJYQ3NpZElZSCtRPT0=
-          </licence>
-          <!-- insert other configuration here (optional) -->
-        </configuration>
-      </plugin>
     </plugins>
   </build>
   <profiles>


Reply via email to