Author: chetanm
Date: Tue Jul 14 09:57:03 2015
New Revision: 1690891

URL: http://svn.apache.org/r1690891
Log:
OAK-2953 - Implement text extractor as part of oak-run

Merging 1690249,1690636,1690669

Added:
    
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/
      - copied from r1690249, 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/
    
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java
      - copied, changed from r1690636, 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java
    
jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/
      - copied from r1690249, 
jackrabbit/oak/trunk/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/
Modified:
    jackrabbit/oak/branches/1.2/   (props changed)
    jackrabbit/oak/branches/1.2/oak-run/pom.xml
    jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run-jr2.xml
    jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run.xml
    
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProvider.java
    
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
    
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java
    jackrabbit/oak/branches/1.2/oak-run/src/main/resources/logback.xml
    
jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProviderTest.java

Propchange: jackrabbit/oak/branches/1.2/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Jul 14 09:57:03 2015
@@ -1,3 +1,3 @@
 /jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820,1684868,1685023,1685370,1685552
 
,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690634-1690635,1690674
+/jackrabbit/oak/trunk:1672350,1672468,1672537,1672603,1672642,1672644,1672834-1672835,1673351,1673410,1673414-1673415,1673436,1673644,1673662-1673664,1673669,1673695,1673738,1673787,1673791,1674046,1674065,1674075,1674107,1674228,1674780,1674880,1675054-1675055,1675319,1675332,1675354,1675357,1675382,1675555,1675566,1675593,1676198,1676237,1676407,1676458,1676539,1676670,1676693,1676703,1676725,1677579,1677581,1677609,1677611,1677774,1677788,1677797,1677804,1677806,1677939,1677991,1678023,1678095-1678096,1678171,1678173,1678211,1678323,1678758,1678938,1678954,1679144,1679165,1679191,1679232,1679235,1679503,1679958,1679961,1680170,1680182,1680222,1680232,1680236,1680461,1680633,1680643,1680747,1680805-1680806,1680903,1681282,1681767,1681918,1682042,1682218,1682235,1682437,1682494,1682555,1682855,1682904,1683059,1683089,1683213,1683249,1683259,1683278,1683323,1683687,1683700,1684174-1684175,1684186,1684376,1684442,1684561,1684570,1684601,1684618,1684820,1684868,1685023,1685370,1685552
 
,1685589-1685590,1685840,1685964,1685977,1685989,1685999,1686023,1686032,1686097,1686162,1686229,1686234,1686253,1686414,1686780,1686854,1686857,1686971,1687053-1687055,1687175,1687196,1687198,1687220,1687239-1687240,1687301,1687441,1687553,1688089-1688090,1688172,1688179,1688349,1688421,1688436,1688453,1688616,1688622,1688636,1688817,1689003-1689004,1689008,1689577,1689581,1689623,1689810,1689828,1689833,1689903,1690017,1690043,1690047,1690057,1690247,1690249,1690634-1690636,1690669,1690674
 /jackrabbit/trunk:1345480

Modified: jackrabbit/oak/branches/1.2/oak-run/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/pom.xml?rev=1690891&r1=1690890&r2=1690891&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-run/pom.xml (original)
+++ jackrabbit/oak/branches/1.2/oak-run/pom.xml Tue Jul 14 09:57:03 2015
@@ -357,6 +357,22 @@
       <scope>compile</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+      <version>1.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>1.5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-csv</artifactId>
+      <version>1.1</version>
+    </dependency>
+
     <!-- Findbugs annotations -->
     <dependency>
       <groupId>com.google.code.findbugs</groupId>

Modified: jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run-jr2.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run-jr2.xml?rev=1690891&r1=1690890&r2=1690891&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run-jr2.xml 
(original)
+++ jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run-jr2.xml Tue 
Jul 14 09:57:03 2015
@@ -38,11 +38,13 @@
       <excludes>
         <exclude>org.apache.jackrabbit:oak-lucene</exclude>
         <exclude>org.apache.lucene</exclude>
+        <exclude>org.apache.tika</exclude>
       </excludes>
       <useStrictFiltering>true</useStrictFiltering>
       <useProjectArtifact>true</useProjectArtifact>
       <unpack>true</unpack>
       <useTransitiveDependencies>true</useTransitiveDependencies>
+      <useTransitiveFiltering>true</useTransitiveFiltering>
       <unpackOptions>
         <excludes>
           <exclude>META-INF/*.SF</exclude>

Modified: jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run.xml?rev=1690891&r1=1690890&r2=1690891&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run.xml (original)
+++ jackrabbit/oak/branches/1.2/oak-run/src/main/assembly/oak-run.xml Tue Jul 
14 09:57:03 2015
@@ -33,11 +33,14 @@
         <exclude>org.apache.jackrabbit:jackrabbit-core</exclude>
         <exclude>org.apache.lucene</exclude>
         <exclude>org.apache.derby</exclude>
+        <exclude>org.apache.tika:tika-core:*</exclude>
+        <exclude>org.apache.tika:tika-parsers:*</exclude>
       </excludes>
       <useStrictFiltering>true</useStrictFiltering>
       <useProjectArtifact>true</useProjectArtifact>
       <unpack>true</unpack>
       <useTransitiveDependencies>true</useTransitiveDependencies>
+      <useTransitiveFiltering>true</useTransitiveFiltering>
       <unpackOptions>
         <excludes>
           <exclude>META-INF/*.SF</exclude>
@@ -51,5 +54,24 @@
         </excludes>
       </unpackOptions>
     </dependencySet>
+    <!-- Exclude the transitive dependency as tika-parsers depend
+      on many other jars. Instead users can include tika-app.jar in 
classpath-->
+    <dependencySet>
+      <outputDirectory>/</outputDirectory>
+      <includes>
+        <include>org.apache.tika:tika-core</include>
+        <include>org.apache.tika:tika-parsers</include>
+      </includes>
+      <useStrictFiltering>true</useStrictFiltering>
+      <useTransitiveDependencies>false</useTransitiveDependencies>
+      <unpack>true</unpack>
+      <unpackOptions>
+        <excludes>
+          <exclude>META-INF/*.SF</exclude>
+          <exclude>META-INF/*.DSA</exclude>
+          <exclude>META-INF/*.RSA</exclude>
+        </excludes>
+      </unpackOptions>
+    </dependencySet>
   </dependencySets>
 </assembly>

Copied: 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java
 (from r1690636, 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java)
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java?p2=jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java&p1=jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java&r1=1690636&r2=1690891&rev=1690891&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/CSVFileGenerator.java
 Tue Jul 14 09:57:03 2015
@@ -32,15 +32,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class CSVFileGenerator {
-    /*
-        Instead of using the FORMAT from CSVFileBinaryResourceProvider
-        defining our own without header. Otherwise commons-csv was always
-        adding the header
-     */
-    private static final CSVFormat FORMAT = CSVFormat.DEFAULT
-            .withCommentMarker('#')
-            .withNullString("") //Empty string are considered as null
-            .withIgnoreSurroundingSpaces();
     private final Logger log = LoggerFactory.getLogger(getClass());
     private File outFile;
 
@@ -52,7 +43,8 @@ public class CSVFileGenerator {
         Closer closer = Closer.create();
         int count = 0;
         try{
-            CSVPrinter printer = new CSVPrinter(Files.newWriter(outFile, 
Charsets.UTF_8), FORMAT);
+            CSVPrinter printer = new CSVPrinter(Files.newWriter(outFile, 
Charsets.UTF_8),
+                    CSVFileBinaryResourceProvider.FORMAT);
             for (BinaryResource br : binaries){
                 count++;
                 printer.printRecord(

Modified: 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProvider.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProvider.java?rev=1690891&r1=1690249&r2=1690891&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProvider.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProvider.java
 Tue Jul 14 09:57:03 2015
@@ -72,7 +72,9 @@ class NodeStoreBinaryResourceProvider im
 
             Blob blob = data.getValue(Type.BINARY);
             String blobId = blob.getContentIdentity();
-            if (blobId == null) {
+            //Check for ref being non null to ensure its not an inlined binary
+            //For Segment ContentIdentity defaults to RecordId
+            if (blob.getReference() == null || blobId == null) {
                 log.debug("Ignoring jcr:data property at {} as its an inlined 
blob", tree.getPath());
                 return null;
             }

Modified: 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java?rev=1690891&r1=1690249&r2=1690891&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractorMain.java
 Tue Jul 14 09:57:03 2015
@@ -21,16 +21,26 @@ package org.apache.jackrabbit.oak.plugin
 
 import java.io.Closeable;
 import java.io.File;
+import java.io.IOException;
 import java.util.List;
 
 import com.google.common.io.Closer;
+import com.mongodb.MongoClientURI;
+import com.mongodb.MongoURI;
 import joptsimple.OptionParser;
 import joptsimple.OptionSet;
 import joptsimple.OptionSpec;
 import org.apache.jackrabbit.core.data.FileDataStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreTextWriter;
+import org.apache.jackrabbit.oak.plugins.document.DocumentMK;
+import org.apache.jackrabbit.oak.plugins.document.DocumentNodeStore;
+import org.apache.jackrabbit.oak.plugins.document.util.MongoConnection;
+import org.apache.jackrabbit.oak.plugins.segment.SegmentNodeStore;
+import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
+import org.apache.jackrabbit.oak.run.Main;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -89,8 +99,6 @@ public class TextExtractorMain {
                     .withRequiredArg()
                     .ofType(Integer.class);
 
-            //TODO implement generate support
-
             OptionSpec<String> nonOption = parser.nonOptions(h);
 
             OptionSet options = parser.parse(args);
@@ -108,7 +116,8 @@ public class TextExtractorMain {
 
             boolean report = nonOptions.contains("report");
             boolean extract = nonOptions.contains("extract");
-            File dataFile;
+            boolean generate = nonOptions.contains("generate");
+            File dataFile = null;
             File fdsDir;
             File storeDir = null;
             File tikaConfigFile = null;
@@ -142,23 +151,35 @@ public class TextExtractorMain {
 
             if (options.has(dataFileSpec)) {
                 dataFile = dataFileSpec.value(options);
-                checkArgument(dataFile.exists(), "Data file %s does not 
exist", dataFile.getAbsolutePath());
-                binaryResourceProvider = new 
CSVFileBinaryResourceProvider(dataFile, blobStore);
             }
 
-            if (binaryResourceProvider instanceof Closeable) {
-                closer.register((Closeable) binaryResourceProvider);
-            }
+            checkNotNull(dataFile, "Data file not configured with %s", 
dataFileSpec);
 
             if (report || extract) {
-                checkNotNull(binaryResourceProvider, "BinaryProvider source 
must be specified either " +
-                        "via '%s' or '%s", dataFileSpec.options(), 
nodeStoreSpec.options());
+                checkArgument(dataFile.exists(),
+                        "Data file %s does not exist", 
dataFile.getAbsolutePath());
+
+                binaryResourceProvider = new 
CSVFileBinaryResourceProvider(dataFile, blobStore);
+                if (binaryResourceProvider instanceof Closeable) {
+                    closer.register((Closeable) binaryResourceProvider);
+                }
 
                 stats = new BinaryStats(tikaConfigFile, 
binaryResourceProvider);
                 String summary = stats.getSummary();
                 log.info(summary);
             }
 
+            if (generate){
+                String src = nodeStoreSpec.value(options);
+                checkNotNull(blobStore, "BlobStore found to be null. 
FileDataStore directory " +
+                        "must be specified via %s", fdsDirSpec.options());
+                checkNotNull(dataFile, "Data file path not provided");
+                NodeStore nodeStore = bootStrapNodeStore(src, blobStore, 
closer);
+                BinaryResourceProvider brp = new 
NodeStoreBinaryResourceProvider(nodeStore, blobStore);
+                CSVFileGenerator generator = new CSVFileGenerator(dataFile);
+                generator.generate(brp.getBinaries(path));
+            }
+
             if (extract) {
                 checkNotNull(storeDir, "Directory to store extracted text 
content " +
                         "must be specified via %s", storeDirSpec.options());
@@ -197,4 +218,56 @@ public class TextExtractorMain {
             closer.close();
         }
     }
+
+    private static NodeStore bootStrapNodeStore(String src, BlobStore 
blobStore,
+                                                Closer closer) throws 
IOException {
+        if (src.startsWith(MongoURI.MONGODB_PREFIX)) {
+            MongoClientURI uri = new MongoClientURI(src);
+            if (uri.getDatabase() == null) {
+                System.err.println("Database missing in MongoDB URI: "
+                        + uri.getURI());
+                System.exit(1);
+            }
+            MongoConnection mongo = new MongoConnection(uri.getURI());
+            closer.register(asCloseable(mongo));
+            DocumentNodeStore store = new DocumentMK.Builder()
+                    .setBlobStore(blobStore)
+                    .setMongoDB(mongo.getDB()).getNodeStore();
+            closer.register(asCloseable(store));
+            return store;
+        }
+        FileStore fs = FileStore.newFileStore(new File(src))
+                .withBlobStore(blobStore)
+                .withMemoryMapping(Main.TAR_STORAGE_MEMORY_MAPPED)
+                .create();
+        closer.register(asCloseable(fs));
+        return new SegmentNodeStore(fs);
+    }
+
+    private static Closeable asCloseable(final FileStore fs) {
+        return new Closeable() {
+            @Override
+            public void close() throws IOException {
+                fs.close();
+            }
+        };
+    }
+
+    private static Closeable asCloseable(final DocumentNodeStore dns) {
+        return new Closeable() {
+            @Override
+            public void close() throws IOException {
+                dns.dispose();
+            }
+        };
+    }
+
+    private static Closeable asCloseable(final MongoConnection con) {
+        return new Closeable() {
+            @Override
+            public void close() throws IOException {
+                con.close();
+            }
+        };
+    }
 }

Modified: 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java?rev=1690891&r1=1690890&r2=1690891&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-run/src/main/java/org/apache/jackrabbit/oak/run/Main.java
 Tue Jul 14 09:57:03 2015
@@ -99,6 +99,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.segment.file.FileStore;
 import org.apache.jackrabbit.oak.plugins.segment.standby.client.StandbyClient;
 import org.apache.jackrabbit.oak.plugins.segment.standby.server.StandbyServer;
+import org.apache.jackrabbit.oak.plugins.tika.TextExtractorMain;
 import org.apache.jackrabbit.oak.scalability.ScalabilityRunner;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
@@ -187,6 +188,9 @@ public class Main {
             case REPAIR:
                 repair(args);
                 break;
+            case TIKA:
+                TextExtractorMain.main(args);
+                break;
             case HELP:
             default:
                 System.err.print("Available run modes: ");
@@ -1175,7 +1179,8 @@ public class Main {
         HELP("help"),
         CHECKPOINTS("checkpoints"),
         RECOVERY("recovery"),
-        REPAIR("repair");
+        REPAIR("repair"),
+        TIKA("tika");
 
         private final String name;
 

Modified: jackrabbit/oak/branches/1.2/oak-run/src/main/resources/logback.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/main/resources/logback.xml?rev=1690891&r1=1690890&r2=1690891&view=diff
==============================================================================
--- jackrabbit/oak/branches/1.2/oak-run/src/main/resources/logback.xml 
(original)
+++ jackrabbit/oak/branches/1.2/oak-run/src/main/resources/logback.xml Tue Jul 
14 09:57:03 2015
@@ -36,6 +36,8 @@
   <!-- Display info messages from the scalability suite -->
   <logger name="org.apache.jackrabbit.oak.scalability" level="INFO"/>
 
+  <logger name="org.apache.jackrabbit.oak.plugins.tika" level="INFO"/>
+
   <logger 
name="org.apache.jackrabbit.oak.plugins.segment.file.tooling.ConsistencyChecker"
 level="DEBUG"/>
 
   <root level="warn">

Modified: 
jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProviderTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProviderTest.java?rev=1690891&r1=1690249&r2=1690891&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProviderTest.java
 (original)
+++ 
jackrabbit/oak/branches/1.2/oak-run/src/test/java/org/apache/jackrabbit/oak/plugins/tika/NodeStoreBinaryResourceProviderTest.java
 Tue Jul 14 09:57:03 2015
@@ -19,8 +19,13 @@
 
 package org.apache.jackrabbit.oak.plugins.tika;
 
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+
 import org.apache.jackrabbit.JcrConstants;
 import org.apache.jackrabbit.oak.api.Blob;
+import org.apache.jackrabbit.oak.plugins.blob.BlobStoreBlob;
 import org.apache.jackrabbit.oak.plugins.memory.ArrayBasedBlob;
 import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
 import org.apache.jackrabbit.oak.spi.blob.BlobStore;
@@ -28,13 +33,18 @@ import org.apache.jackrabbit.oak.spi.blo
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 
 import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT;
 import static 
org.apache.jackrabbit.oak.plugins.nodetype.write.InitialContent.INITIAL_CONTENT;
 import static org.junit.Assert.assertEquals;
 
 public class NodeStoreBinaryResourceProviderTest {
+    @Rule
+    public final TemporaryFolder temporaryFolder = new TemporaryFolder();
+
     private NodeState root = INITIAL_CONTENT;
 
     @Test
@@ -57,6 +67,24 @@ public class NodeStoreBinaryResourceProv
         assertEquals("text/foo", bs.getMimeType());
         assertEquals("bar", bs.getEncoding());
         assertEquals("id2", bs.getBlobId());
+    }
+
+    @Test
+    public void csvGenerator() throws Exception {
+        File csv = new File(temporaryFolder.getRoot(), "test.csv");
+        BlobStore blobStore = new MemoryBlobStore();
+        NodeBuilder builder = root.builder();
+        createFileNode(builder, "a", blobOf("foo", blobStore), "text/plain");
+        createFileNode(builder, "b", blobOf("hello", blobStore), "text/plain");
+
+        NodeStore store = new MemoryNodeStore(builder.getNodeState());
+
+        NodeStoreBinaryResourceProvider extractor = new 
NodeStoreBinaryResourceProvider(store, blobStore);
+        CSVFileGenerator generator = new CSVFileGenerator(csv);
+        generator.generate(extractor.getBinaries("/"));
+
+        CSVFileBinaryResourceProvider csvbrp = new 
CSVFileBinaryResourceProvider(csv, blobStore);
+        assertEquals(2, csvbrp.getBinaries("/").size());
 
     }
 
@@ -67,6 +95,11 @@ public class NodeStoreBinaryResourceProv
         return jcrContent;
     }
 
+    private Blob blobOf(String content, BlobStore bs) throws IOException {
+        String id = bs.writeBlob(new ByteArrayInputStream(content.getBytes()));
+        return new BlobStoreBlob(bs, id);
+    }
+
     private static class IdBlob extends ArrayBasedBlob {
         final String id;
 
@@ -76,6 +109,11 @@ public class NodeStoreBinaryResourceProv
         }
 
         @Override
+        public String getReference() {
+            return id;
+        }
+
+        @Override
         public String getContentIdentity() {
             return id;
         }


Reply via email to