Author: catholicon
Date: Tue Jun 6 06:44:55 2017
New Revision: 1797738
URL: http://svn.apache.org/viewvc?rev=1797738&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore
without relying on full scale Blob GC
Review comments:
* Use blob.getContentIdentity()
* Add javadocs
* refactor setting up external directory factory
Minor cleanup
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditorProvider.java
Tue Jun 6 06:44:55 2017
@@ -68,7 +68,6 @@ public class LuceneIndexEditorProvider i
private final ActiveDeletedBlobCollector activeDeletedBlobCollector;
private GarbageCollectableBlobStore blobStore;
private IndexingQueue indexingQueue;
- private DirectoryFactory externallyProvidedDirectoryFactory;
/**
* Number of indexed Lucene document that can be held in memory
@@ -131,7 +130,7 @@ public class LuceneIndexEditorProvider i
IndexingContext indexingContext =
((ContextAwareCallback)callback).getIndexingContext();
BlobDeletionCallback blobDeletionCallback =
activeDeletedBlobCollector.getBlobDeletionCallback();
indexingContext.registerIndexCommitCallback(blobDeletionCallback);
- indexWriterFactory = new
DefaultIndexWriterFactory(mountInfoProvider,
getDirectoryFactory(blobDeletionCallback));
+ indexWriterFactory = new
DefaultIndexWriterFactory(mountInfoProvider,
newDirectoryFactory(blobDeletionCallback));
LuceneIndexWriterFactory writerFactory = indexWriterFactory;
IndexDefinition indexDefinition = null;
boolean asyncIndexing = true;
@@ -203,16 +202,8 @@ public class LuceneIndexEditorProvider i
this.inMemoryDocsLimit = inMemoryDocsLimit;
}
- public void setDirectoryFactory(DirectoryFactory directoryFactory) {
- this.externallyProvidedDirectoryFactory = directoryFactory;
- }
-
- private DirectoryFactory getDirectoryFactory(BlobDeletionCallback
blobDeletionCallback) {
- if (externallyProvidedDirectoryFactory == null) {
- return new DefaultDirectoryFactory(indexCopier, blobStore,
blobDeletionCallback);
- } else {
- return externallyProvidedDirectoryFactory;
- }
+ protected DirectoryFactory newDirectoryFactory(BlobDeletionCallback
blobDeletionCallback) {
+ return new DefaultDirectoryFactory(indexCopier, blobStore,
blobDeletionCallback);
}
private LuceneDocumentHolder getDocumentHolder(CommitContext
commitContext){
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakDirectory.java
Tue Jun 6 06:44:55 2017
@@ -163,7 +163,13 @@ public class OakDirectory extends Direct
if (property != null) {
if (property.getType() == BINARIES || property.getType() ==
BINARY) {
for (Blob b : property.getValue(BINARIES)) {
- blobDeletionCallback.deleted(b.toString(),
+ //Mark the blob as deleted. Also, post index path, type of
directory
+ //(:suggest, :data, etc) and filename being deleted
+ String blobId = b.getContentIdentity();
+ if (blobId == null) {
+ blobId = b.toString();
+ }
+ blobDeletionCallback.deleted(blobId,
Lists.newArrayList(definition.getIndexPath(),
dataNodeName, name));
}
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
Tue Jun 6 06:44:55 2017
@@ -52,10 +52,12 @@ import static com.google.common.base.Pre
public class ActiveDeletedBlobCollectorFactory {
public interface ActiveDeletedBlobCollector {
+ /**
+ * @return an instance of {@link BlobDeletionCallback} that can be
used to track deleted blobs
+ */
BlobDeletionCallback getBlobDeletionCallback();
void purgeBlobsDeleted(long before, GarbageCollectableBlobStore
blobStore);
}
-// LOG.info("Added {} to delete.", info);
public static ActiveDeletedBlobCollector NOOP = new
ActiveDeletedBlobCollector() {
@Override
@@ -70,6 +72,14 @@ public class ActiveDeletedBlobCollectorF
};
public interface BlobDeletionCallback extends IndexCommitCallback {
+ /**
+ * Tracks deleted blobs. From the pov of this interface, blobId is an
opaque string
+ * that needs to be tracked.
+ * @param blobId blobId representing deleted blob. In theory, it has
nothing to do with
+ * blobs though.
+ * @param ids Information that can be useful for debugging - this is
not used for purging
+ * blobs.
+ */
void deleted(String blobId, Iterable<String> ids);
BlobDeletionCallback NOOP = new BlobDeletionCallback() {
@Override
@@ -143,6 +153,11 @@ public class ActiveDeletedBlobCollectorF
this.deletedBlobsFileWriter = new DeletedBlobsFileWriter();
}
+ /**
+ * Purges blobs form blob-store which were tracked earlier to deleted.
+ * @param before only purge blobs which were deleted before this
timestamps
+ * @param blobStore
+ */
public void purgeBlobsDeleted(long before, @Nonnull
GarbageCollectableBlobStore blobStore) {
long numBlobsDeleted = 0;
long numChunksDeleted = 0;
@@ -181,13 +196,15 @@ public class ActiveDeletedBlobCollectorF
break;
}
- long deleted = blobStore.countDeleteChunks(
-
Lists.newArrayList(blobStore.resolveChunks(deletedBlobId)), 0);
- if (deleted < 1) {
- LOG.warn("Blob {} in file {} not
deleted", deletedBlobId, deletedBlobListFile);
- } else {
- numBlobsDeleted++;
- numChunksDeleted += deleted;
+ List<String> chunkIds =
Lists.newArrayList(blobStore.resolveChunks(deletedBlobId));
+ if (chunkIds.size() > 0) {
+ long deleted =
blobStore.countDeleteChunks(chunkIds, 0);
+ if (deleted < 1) {
+ LOG.warn("Blob {} in file {} not
deleted", deletedBlobId, deletedBlobListFile);
+ } else {
+ numBlobsDeleted++;
+ numChunksDeleted += deleted;
+ }
}
} catch (NumberFormatException nfe) {
LOG.warn("Couldn't parse blobTimestamp(" +
parsedDeletedBlobIdLine[1] +
@@ -333,6 +350,10 @@ public class ActiveDeletedBlobCollectorF
}
}
+ /**
+ * This implementation would track deleted blobs and then pass them
onto
+ * {@link ActiveDeletedBlobCollectorImpl} on a successful commit
+ */
private class DeletedBlobCollector implements BlobDeletionCallback {
List<BlobIdInfoStruct> deletedBlobs = new ArrayList<>();
Modified:
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java?rev=1797738&r1=1797737&r2=1797738&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/index/LuceneIndexHelper.java
Tue Jun 6 06:44:55 2017
@@ -28,6 +28,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.plugins.index.lucene.ExtractedTextCache;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexCopier;
import
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditorProvider;
+import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory.BlobDeletionCallback;
import
org.apache.jackrabbit.oak.plugins.index.lucene.directory.DirectoryFactory;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
@@ -44,21 +45,32 @@ class LuceneIndexHelper implements Close
}
public LuceneIndexEditorProvider createEditorProvider() throws IOException
{
- LuceneIndexEditorProvider editor = new LuceneIndexEditorProvider(
- getIndexCopier(),
- textCache,
- null,
- indexHelper.getMountInfoProvider()
- );
+ LuceneIndexEditorProvider editor;
+ if (directoryFactory != null) {
+ editor = new LuceneIndexEditorProvider(
+ getIndexCopier(),
+ textCache,
+ null,
+ indexHelper.getMountInfoProvider()
+ ) {
+ @Override
+ protected DirectoryFactory
newDirectoryFactory(BlobDeletionCallback blobDeletionCallback) {
+ return directoryFactory;
+ }
+ };
+ } else {
+ editor = new LuceneIndexEditorProvider(
+ getIndexCopier(),
+ textCache,
+ null,
+ indexHelper.getMountInfoProvider()
+ );
+ }
if (indexHelper.getBlobStore() instanceof GarbageCollectableBlobStore)
{
editor.setBlobStore((GarbageCollectableBlobStore)
indexHelper.getBlobStore());
}
- if (directoryFactory != null) {
- editor.setDirectoryFactory(directoryFactory);
- }
-
return editor;
}