thomasmueller commented on code in PR #1123:
URL: https://github.com/apache/jackrabbit-oak/pull/1123#discussion_r1328465143


##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java:
##########
@@ -118,14 +122,53 @@ public static String getPath(String entryLine) {
         return entryLine.substring(0, getDelimiterPosition(entryLine));
     }
 
+    /*
+        Currently we only have 2 types of store:
+        1. FlatFileStore
+        2. IncrementalFlatFileStore
+     */
     public static String[] getParts(String line) {
-        int pos = getDelimiterPosition(line);
-        return new String[]{line.substring(0, pos), line.substring(pos + 1)};
+        // file is FlatFileStore
+        if (line.endsWith("}")) {
+            int pos = getDelimiterPosition(line);
+            return new String[]{line.substring(0, pos), line.substring(pos + 
1)};
+        } else {
+            // file is IncrementalFlatFileStore
+            List<Integer> positions = getDelimiterPositions(line);
+            checkState(positions.size() >= 2, "Invalid path entry [%s]", line);
+            // there are 4 parts in incrementalFFS and default delimiter is |
+            // path|nodeData|checkpoint|operand
+            // Node's data can itself have many | so we split based on first 
and last 2 |
+            String[] parts = new String[4];
+            parts[0] = line.substring(0, positions.get(0));
+            parts[1] = line.substring(positions.get(0) + 1, 
positions.get(positions.size() - 2));
+            parts[2] = line.substring(positions.get(positions.size() - 2) + 1, 
positions.get(positions.size() - 1));
+            parts[3] = line.substring(positions.get(positions.size() - 1) + 1);

Review Comment:
   same here
   
       parts[3] = line.substring(positions.get(2) + 1);
   
   
   (I think)



##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java:
##########
@@ -244,73 +255,83 @@ public FlatFileStore build() throws IOException, 
CompositeException {
     }
 
     public List<FlatFileStore> buildList(IndexHelper indexHelper, 
IndexerSupport indexerSupport,
-            Set<IndexDefinition> indexDefinitions) throws IOException, 
CompositeException {
+                                         Set<IndexDefinition> 
indexDefinitions) throws IOException, CompositeException {
         logFlags();
         comparator = new PathElementComparator(preferredPathElements);
         entryWriter = new NodeStateEntryWriter(blobStore);
 
-        List<File> fileList = createdSortedStoreFiles();
-        
+        Pair<List<File>, List<File>> pair = createdSortedStoreFiles();
+        List<File> fileList = pair.getKey();
+        File metadataFile = pair.getValue() == null || pair.getValue().size() 
== 0 ? null : pair.getValue().get(0);
+
         long start = System.currentTimeMillis();
         // If not already split, split otherwise skip splitting
         if (!fileList.stream().allMatch(FlatFileSplitter.IS_SPLIT)) {
             NodeStore nodeStore = new 
MemoryNodeStore(indexerSupport.retrieveNodeStateForCheckpoint());
             FlatFileSplitter splitter = new FlatFileSplitter(fileList.get(0), 
indexHelper.getWorkDir(),
-                new NodeStateNodeTypeInfoProvider(nodeStore.getRoot()), new 
NodeStateEntryReader(blobStore),
-                indexDefinitions);
+                    new NodeStateNodeTypeInfoProvider(nodeStore.getRoot()), 
new NodeStateEntryReader(blobStore),
+                    indexDefinitions);
             fileList = splitter.split();
             log.info("Split flat file to result files '{}' is done, took {} 
ms", fileList, System.currentTimeMillis() - start);
         }
 
         List<FlatFileStore> storeList = new ArrayList<>();
         for (File flatFileItem : fileList) {
-            FlatFileStore store = new FlatFileStore(blobStore, flatFileItem, 
new NodeStateEntryReader(blobStore),
+            FlatFileStore store = new FlatFileStore(blobStore, flatFileItem, 
metadataFile, new NodeStateEntryReader(blobStore),
                     unmodifiableSet(preferredPathElements), algorithm);
             storeList.add(store);
         }
         return storeList;
     }
 
     /**
-     * Returns the existing list of store files if it can read from system 
property OAK_INDEXER_SORTED_FILE_PATH which 
-     * defines the existing folder where the flat file store files are 
present. Will throw an exception if it cannot 
+     * Returns the existing list of store files if it can read from system 
property OAK_INDEXER_SORTED_FILE_PATH which
+     * defines the existing folder where the flat file store files are 
present. Will throw an exception if it cannot
      * read or the path in the system property is not a directory.
-     * If the system property OAK_INDEXER_SORTED_FILE_PATH in undefined, or it 
cannot read relevant files it 
+     * If the system property OAK_INDEXER_SORTED_FILE_PATH in undefined, or it 
cannot read relevant files it
      * initializes the flat file store.
-     * 
-     * @return list of flat files
-     * @throws IOException 
+     *
+     * @return pair of "list of flat files" and metadata file
+     * @throws IOException
      * @throws CompositeException
      */
-    private List<File> createdSortedStoreFiles() throws IOException, 
CompositeException {
+    private Pair<List<File>, List<File>> createdSortedStoreFiles() throws 
IOException, CompositeException {
         // Check system property defined path
         String sortedFilePath = 
System.getProperty(OAK_INDEXER_SORTED_FILE_PATH);
         if (StringUtils.isNotBlank(sortedFilePath)) {
             File sortedDir = new File(sortedFilePath);
             log.info("Attempting to read from provided sorted files directory 
[{}] (via system property '{}')",
-                sortedDir.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH);
-            List<File> files = getFiles(sortedDir);
-            if (files != null) {
-                return files;
+                    sortedDir.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH);
+            // List of storefiles, List of metadatafile
+            Pair<List<File>, List<File>> storeFiles = 
getIndexStoreFiles(sortedDir);
+            if (storeFiles != null) {
+                return storeFiles;
             }
         }
 
         // Initialize the flat file store again
 
         createStoreDir();
-        SortStrategy strategy = createSortStrategy(flatFileStoreDir);
+        IndexStoreSortStrategy strategy = createSortStrategy(flatFileStoreDir);
         File result = strategy.createSortedStoreFile();
+        File metadata = strategy.createMetadataFile();
         entryCount = strategy.getEntryCount();
-        return Collections.singletonList(result);
+        return new ImmutablePair<>(Collections.singletonList(result), 
Collections.singletonList(metadata));
     }
 
     @Nullable
-    private List<File> getFiles(File sortedDir) {
+    private Pair<List<File>, List<File>> getIndexStoreFiles(File sortedDir) {

Review Comment:
   I'm also not quite sure... 
   
   If we have one metafile per store file (?) then we could also use 
List<Pair<File, File>>.
   
   But a separate class might be even better.



##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java:
##########
@@ -118,14 +122,53 @@ public static String getPath(String entryLine) {
         return entryLine.substring(0, getDelimiterPosition(entryLine));
     }
 
+    /*
+        Currently we only have 2 types of store:
+        1. FlatFileStore
+        2. IncrementalFlatFileStore
+     */
     public static String[] getParts(String line) {
-        int pos = getDelimiterPosition(line);
-        return new String[]{line.substring(0, pos), line.substring(pos + 1)};
+        // file is FlatFileStore
+        if (line.endsWith("}")) {
+            int pos = getDelimiterPosition(line);
+            return new String[]{line.substring(0, pos), line.substring(pos + 
1)};
+        } else {
+            // file is IncrementalFlatFileStore
+            List<Integer> positions = getDelimiterPositions(line);
+            checkState(positions.size() >= 2, "Invalid path entry [%s]", line);
+            // there are 4 parts in incrementalFFS and default delimiter is |
+            // path|nodeData|checkpoint|operand
+            // Node's data can itself have many | so we split based on first 
and last 2 |
+            String[] parts = new String[4];
+            parts[0] = line.substring(0, positions.get(0));
+            parts[1] = line.substring(positions.get(0) + 1, 
positions.get(positions.size() - 2));
+            parts[2] = line.substring(positions.get(positions.size() - 2) + 1, 
positions.get(positions.size() - 1));
+            parts[3] = line.substring(positions.get(positions.size() - 1) + 1);
+            return parts;
+        }
     }
 
     private static int getDelimiterPosition(String entryLine) {
         int indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER);
         checkState(indexOfPipe > 0, "Invalid path entry [%s]", entryLine);
         return indexOfPipe;
     }
+
+    private static List<Integer> getDelimiterPositions(String entryLine) {

Review Comment:
   I don't actually like the functional approach here. For me, the loop is 
easier to debug.



##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java:
##########
@@ -319,23 +340,26 @@ private List<File> getFiles(File sortedDir) {
         return null;

Review Comment:
   (For me, returning null is fine... My experience is that changing existing 
code is not worth the trouble.)



##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java:
##########
@@ -118,14 +122,53 @@ public static String getPath(String entryLine) {
         return entryLine.substring(0, getDelimiterPosition(entryLine));
     }
 
+    /*
+        Currently we only have 2 types of store:
+        1. FlatFileStore
+        2. IncrementalFlatFileStore
+     */
     public static String[] getParts(String line) {
-        int pos = getDelimiterPosition(line);
-        return new String[]{line.substring(0, pos), line.substring(pos + 1)};
+        // file is FlatFileStore
+        if (line.endsWith("}")) {
+            int pos = getDelimiterPosition(line);
+            return new String[]{line.substring(0, pos), line.substring(pos + 
1)};
+        } else {
+            // file is IncrementalFlatFileStore
+            List<Integer> positions = getDelimiterPositions(line);
+            checkState(positions.size() >= 2, "Invalid path entry [%s]", line);
+            // there are 4 parts in incrementalFFS and default delimiter is |
+            // path|nodeData|checkpoint|operand
+            // Node's data can itself have many | so we split based on first 
and last 2 |
+            String[] parts = new String[4];
+            parts[0] = line.substring(0, positions.get(0));
+            parts[1] = line.substring(positions.get(0) + 1, 
positions.get(positions.size() - 2));

Review Comment:
   Why not use the simpler
   
       line.substring(positions.get(0) + 1, positions.get(1));
   
   ? I think it should be equivalent.



##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java:
##########
@@ -118,14 +122,53 @@ public static String getPath(String entryLine) {
         return entryLine.substring(0, getDelimiterPosition(entryLine));
     }
 
+    /*
+        Currently we only have 2 types of store:
+        1. FlatFileStore
+        2. IncrementalFlatFileStore
+     */
     public static String[] getParts(String line) {
-        int pos = getDelimiterPosition(line);
-        return new String[]{line.substring(0, pos), line.substring(pos + 1)};
+        // file is FlatFileStore
+        if (line.endsWith("}")) {
+            int pos = getDelimiterPosition(line);
+            return new String[]{line.substring(0, pos), line.substring(pos + 
1)};
+        } else {
+            // file is IncrementalFlatFileStore
+            List<Integer> positions = getDelimiterPositions(line);
+            checkState(positions.size() >= 2, "Invalid path entry [%s]", line);
+            // there are 4 parts in incrementalFFS and default delimiter is |
+            // path|nodeData|checkpoint|operand
+            // Node's data can itself have many | so we split based on first 
and last 2 |
+            String[] parts = new String[4];
+            parts[0] = line.substring(0, positions.get(0));
+            parts[1] = line.substring(positions.get(0) + 1, 
positions.get(positions.size() - 2));
+            parts[2] = line.substring(positions.get(positions.size() - 2) + 1, 
positions.get(positions.size() - 1));
+            parts[3] = line.substring(positions.get(positions.size() - 1) + 1);
+            return parts;
+        }
     }
 
     private static int getDelimiterPosition(String entryLine) {
         int indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER);
         checkState(indexOfPipe > 0, "Invalid path entry [%s]", entryLine);
         return indexOfPipe;
     }
+
+    private static List<Integer> getDelimiterPositions(String entryLine) {
+        List<Integer> indexPositions = new LinkedList<>();
+        int index = 0;
+        int indexOfPipe;
+        while (true) {
+            indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER, 
index);
+            if (indexOfPipe > 0) {

Review Comment:
   Actually, it should be ">= 0"



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to