thomasmueller commented on code in PR #1123: URL: https://github.com/apache/jackrabbit-oak/pull/1123#discussion_r1328465143
########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java: ########## @@ -118,14 +122,53 @@ public static String getPath(String entryLine) { return entryLine.substring(0, getDelimiterPosition(entryLine)); } + /* + Currently we only have 2 types of store: + 1. FlatFileStore + 2. IncrementalFlatFileStore + */ public static String[] getParts(String line) { - int pos = getDelimiterPosition(line); - return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + // file is FlatFileStore + if (line.endsWith("}")) { + int pos = getDelimiterPosition(line); + return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + } else { + // file is IncrementalFlatFileStore + List<Integer> positions = getDelimiterPositions(line); + checkState(positions.size() >= 2, "Invalid path entry [%s]", line); + // there are 4 parts in incrementalFFS and default delimiter is | + // path|nodeData|checkpoint|operand + // Node's data can itself have many | so we split based on first and last 2 | + String[] parts = new String[4]; + parts[0] = line.substring(0, positions.get(0)); + parts[1] = line.substring(positions.get(0) + 1, positions.get(positions.size() - 2)); + parts[2] = line.substring(positions.get(positions.size() - 2) + 1, positions.get(positions.size() - 1)); + parts[3] = line.substring(positions.get(positions.size() - 1) + 1); Review Comment: same here parts[3] = line.substring(positions.get(2) + 1); (I think) ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java: ########## @@ -244,73 +255,83 @@ public FlatFileStore build() throws IOException, CompositeException { } public List<FlatFileStore> buildList(IndexHelper indexHelper, IndexerSupport indexerSupport, - Set<IndexDefinition> indexDefinitions) throws IOException, CompositeException { + Set<IndexDefinition> indexDefinitions) throws IOException, CompositeException { logFlags(); comparator = new PathElementComparator(preferredPathElements); entryWriter = new NodeStateEntryWriter(blobStore); - List<File> fileList = createdSortedStoreFiles(); - + Pair<List<File>, List<File>> pair = createdSortedStoreFiles(); + List<File> fileList = pair.getKey(); + File metadataFile = pair.getValue() == null || pair.getValue().size() == 0 ? null : pair.getValue().get(0); + long start = System.currentTimeMillis(); // If not already split, split otherwise skip splitting if (!fileList.stream().allMatch(FlatFileSplitter.IS_SPLIT)) { NodeStore nodeStore = new MemoryNodeStore(indexerSupport.retrieveNodeStateForCheckpoint()); FlatFileSplitter splitter = new FlatFileSplitter(fileList.get(0), indexHelper.getWorkDir(), - new NodeStateNodeTypeInfoProvider(nodeStore.getRoot()), new NodeStateEntryReader(blobStore), - indexDefinitions); + new NodeStateNodeTypeInfoProvider(nodeStore.getRoot()), new NodeStateEntryReader(blobStore), + indexDefinitions); fileList = splitter.split(); log.info("Split flat file to result files '{}' is done, took {} ms", fileList, System.currentTimeMillis() - start); } List<FlatFileStore> storeList = new ArrayList<>(); for (File flatFileItem : fileList) { - FlatFileStore store = new FlatFileStore(blobStore, flatFileItem, new NodeStateEntryReader(blobStore), + FlatFileStore store = new FlatFileStore(blobStore, flatFileItem, metadataFile, new NodeStateEntryReader(blobStore), unmodifiableSet(preferredPathElements), algorithm); storeList.add(store); } return storeList; } /** - * Returns the existing list of store files if it can read from system property OAK_INDEXER_SORTED_FILE_PATH which - * defines the existing folder where the flat file store files are present. Will throw an exception if it cannot + * Returns the existing list of store files if it can read from system property OAK_INDEXER_SORTED_FILE_PATH which + * defines the existing folder where the flat file store files are present. Will throw an exception if it cannot * read or the path in the system property is not a directory. - * If the system property OAK_INDEXER_SORTED_FILE_PATH in undefined, or it cannot read relevant files it + * If the system property OAK_INDEXER_SORTED_FILE_PATH in undefined, or it cannot read relevant files it * initializes the flat file store. - * - * @return list of flat files - * @throws IOException + * + * @return pair of "list of flat files" and metadata file + * @throws IOException * @throws CompositeException */ - private List<File> createdSortedStoreFiles() throws IOException, CompositeException { + private Pair<List<File>, List<File>> createdSortedStoreFiles() throws IOException, CompositeException { // Check system property defined path String sortedFilePath = System.getProperty(OAK_INDEXER_SORTED_FILE_PATH); if (StringUtils.isNotBlank(sortedFilePath)) { File sortedDir = new File(sortedFilePath); log.info("Attempting to read from provided sorted files directory [{}] (via system property '{}')", - sortedDir.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH); - List<File> files = getFiles(sortedDir); - if (files != null) { - return files; + sortedDir.getAbsolutePath(), OAK_INDEXER_SORTED_FILE_PATH); + // List of storefiles, List of metadatafile + Pair<List<File>, List<File>> storeFiles = getIndexStoreFiles(sortedDir); + if (storeFiles != null) { + return storeFiles; } } // Initialize the flat file store again createStoreDir(); - SortStrategy strategy = createSortStrategy(flatFileStoreDir); + IndexStoreSortStrategy strategy = createSortStrategy(flatFileStoreDir); File result = strategy.createSortedStoreFile(); + File metadata = strategy.createMetadataFile(); entryCount = strategy.getEntryCount(); - return Collections.singletonList(result); + return new ImmutablePair<>(Collections.singletonList(result), Collections.singletonList(metadata)); } @Nullable - private List<File> getFiles(File sortedDir) { + private Pair<List<File>, List<File>> getIndexStoreFiles(File sortedDir) { Review Comment: I'm also not quite sure... If we have one metafile per store file (?) then we could also use List<Pair<File, File>>. But a separate class might be even better. ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java: ########## @@ -118,14 +122,53 @@ public static String getPath(String entryLine) { return entryLine.substring(0, getDelimiterPosition(entryLine)); } + /* + Currently we only have 2 types of store: + 1. FlatFileStore + 2. IncrementalFlatFileStore + */ public static String[] getParts(String line) { - int pos = getDelimiterPosition(line); - return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + // file is FlatFileStore + if (line.endsWith("}")) { + int pos = getDelimiterPosition(line); + return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + } else { + // file is IncrementalFlatFileStore + List<Integer> positions = getDelimiterPositions(line); + checkState(positions.size() >= 2, "Invalid path entry [%s]", line); + // there are 4 parts in incrementalFFS and default delimiter is | + // path|nodeData|checkpoint|operand + // Node's data can itself have many | so we split based on first and last 2 | + String[] parts = new String[4]; + parts[0] = line.substring(0, positions.get(0)); + parts[1] = line.substring(positions.get(0) + 1, positions.get(positions.size() - 2)); + parts[2] = line.substring(positions.get(positions.size() - 2) + 1, positions.get(positions.size() - 1)); + parts[3] = line.substring(positions.get(positions.size() - 1) + 1); + return parts; + } } private static int getDelimiterPosition(String entryLine) { int indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER); checkState(indexOfPipe > 0, "Invalid path entry [%s]", entryLine); return indexOfPipe; } + + private static List<Integer> getDelimiterPositions(String entryLine) { Review Comment: I don't actually like the functional approach here. For me, the loop is easier to debug. ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileNodeStoreBuilder.java: ########## @@ -319,23 +340,26 @@ private List<File> getFiles(File sortedDir) { return null; Review Comment: (For me, returning null is fine... My experience is that changing existing code is not worth the trouble.) ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java: ########## @@ -118,14 +122,53 @@ public static String getPath(String entryLine) { return entryLine.substring(0, getDelimiterPosition(entryLine)); } + /* + Currently we only have 2 types of store: + 1. FlatFileStore + 2. IncrementalFlatFileStore + */ public static String[] getParts(String line) { - int pos = getDelimiterPosition(line); - return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + // file is FlatFileStore + if (line.endsWith("}")) { + int pos = getDelimiterPosition(line); + return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + } else { + // file is IncrementalFlatFileStore + List<Integer> positions = getDelimiterPositions(line); + checkState(positions.size() >= 2, "Invalid path entry [%s]", line); + // there are 4 parts in incrementalFFS and default delimiter is | + // path|nodeData|checkpoint|operand + // Node's data can itself have many | so we split based on first and last 2 | + String[] parts = new String[4]; + parts[0] = line.substring(0, positions.get(0)); + parts[1] = line.substring(positions.get(0) + 1, positions.get(positions.size() - 2)); Review Comment: Why not use the simpler line.substring(positions.get(0) + 1, positions.get(1)); ? I think it should be equivalent. ########## oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/NodeStateEntryWriter.java: ########## @@ -118,14 +122,53 @@ public static String getPath(String entryLine) { return entryLine.substring(0, getDelimiterPosition(entryLine)); } + /* + Currently we only have 2 types of store: + 1. FlatFileStore + 2. IncrementalFlatFileStore + */ public static String[] getParts(String line) { - int pos = getDelimiterPosition(line); - return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + // file is FlatFileStore + if (line.endsWith("}")) { + int pos = getDelimiterPosition(line); + return new String[]{line.substring(0, pos), line.substring(pos + 1)}; + } else { + // file is IncrementalFlatFileStore + List<Integer> positions = getDelimiterPositions(line); + checkState(positions.size() >= 2, "Invalid path entry [%s]", line); + // there are 4 parts in incrementalFFS and default delimiter is | + // path|nodeData|checkpoint|operand + // Node's data can itself have many | so we split based on first and last 2 | + String[] parts = new String[4]; + parts[0] = line.substring(0, positions.get(0)); + parts[1] = line.substring(positions.get(0) + 1, positions.get(positions.size() - 2)); + parts[2] = line.substring(positions.get(positions.size() - 2) + 1, positions.get(positions.size() - 1)); + parts[3] = line.substring(positions.get(positions.size() - 1) + 1); + return parts; + } } private static int getDelimiterPosition(String entryLine) { int indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER); checkState(indexOfPipe > 0, "Invalid path entry [%s]", entryLine); return indexOfPipe; } + + private static List<Integer> getDelimiterPositions(String entryLine) { + List<Integer> indexPositions = new LinkedList<>(); + int index = 0; + int indexOfPipe; + while (true) { + indexOfPipe = entryLine.indexOf(NodeStateEntryWriter.DELIMITER, index); + if (indexOfPipe > 0) { Review Comment: Actually, it should be ">= 0" -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org