Author: catholicon
Date: Tue Jun 6 06:45:36 2017
New Revision: 1797740
URL: http://svn.apache.org/viewvc?rev=1797740&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore
without relying on full scale Blob GC
Setup scheduling of purge. Points to note:
* By default, the feature is disabled (scheduler interval = -1)
* Purged blobs need to be deleted before Math.min(oldest_checkpoint_timestamp,
curr_time - Long.getLong("oak.active.deletion.minAge") )
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
Tue Jun 6 06:45:36 2017
@@ -34,6 +34,7 @@ import java.util.concurrent.atomic.Atomi
import javax.annotation.Nonnull;
import javax.management.NotCompliantMBeanException;
+import javax.management.openmbean.CompositeData;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
@@ -47,6 +48,7 @@ import org.apache.felix.scr.annotations.
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.ReferencePolicyOption;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.PropertiesUtil;
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
@@ -75,6 +77,7 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.stats.Clock;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -90,6 +93,7 @@ import static com.google.common.base.Pre
import static java.util.Collections.emptyMap;
import static org.apache.commons.io.FileUtils.ONE_MB;
import static
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
+import static
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
@SuppressWarnings("UnusedDeclaration")
@Component(metatype = true, label = "Apache Jackrabbit Oak
LuceneIndexProvider")
@@ -233,15 +237,22 @@ public class LuceneIndexProviderService
)
private static final String PROP_DISABLE_STORED_INDEX_DEFINITION =
"disableStoredIndexDefinition";
- private static final boolean PROP_DELETED_BLOB_COLLECTION_ENABLED = false;
+ private static final int PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL =
-1;
@Property(
- boolValue = PROP_DELETED_BLOB_COLLECTION_ENABLED,
- label = "Actively remove deleted index blobs from blob store",
- description = "Index blobs are explicitly unique and don't require
mark-sweek type collection." +
- "Turning this on would setup early deletion of blobs from
blob collection that are deleted" +
- " during indexing."
- )
- private static final String PROP_ENABLE_DELETED_BLOB_COLLECTION_DEFINITION
= "enableDeletedBlobsCollection";
+ intValue = PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL,
+ label = "Time interval (in seconds) for actively removing deleted
index blobs from blob store",
+ description = "Index blobs are explicitly unique and don't require
mark-sweep type collection." +
+ "This is number of seconds for scheduling clean-up. -1
would disable the functionality." +
+ "Cleanup implies purging index blobs marked as deleted
earlier during some indexing cycle."
+ )
+ private static final String
PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL =
"deletedBlobsCollectionInterval";
+ /**
+ * Actively deleted blob must be deleted for at least this long (in
seconds)
+ */
+ final long MIN_BLOB_AGE_TO_ACTIVELY_DELETE =
Long.getLong("oak.active.deletion.minAge",
+ TimeUnit.HOURS.toSeconds(24));
+
+ private final Clock clock = Clock.SIMPLE;
private Whiteboard whiteboard;
@@ -282,6 +293,9 @@ public class LuceneIndexProviderService
)
private GarbageCollectableBlobStore blobStore;
+ @Reference
+ private CheckpointMBean checkpointMBean;
+
private IndexCopier indexCopier;
private ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollector
activeDeletedBlobCollector;
@@ -323,15 +337,7 @@ public class LuceneIndexProviderService
initializeExtractedTextCache(bundleContext, config);
IndexTracker tracker = createTracker(bundleContext, config);
indexProvider = new LuceneIndexProvider(tracker, scorerFactory,
augmentorFactory);
- if (PROP_DELETED_BLOB_COLLECTION_ENABLED && blobStore != null) {
- File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
- activeDeletedBlobCollector =
ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir,
executorService);
- log.info("Active blob collector initialized at working dir: {}",
blobCollectorWorkingDir);
- } else {
- activeDeletedBlobCollector =
ActiveDeletedBlobCollectorFactory.NOOP;
- log.info("Active blob collector set to NOOP. Enable? {};
blobStore: {}",
- PROP_DELETED_BLOB_COLLECTION_ENABLED, blobStore);
- }
+ initializeActiveBlobCollector(whiteboard, config);
initializeLogging(config);
initialize();
@@ -703,6 +709,53 @@ public class LuceneIndexProviderService
regs.add(bundleContext.registerService(IndexInfoProvider.class.getName(),
infoProvider, null));
}
+ private void initializeActiveBlobCollector(Whiteboard whiteboard,
Map<String, ?> config) {
+ int activeDeletionInterval = PropertiesUtil.toInteger(
+ config.get(PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL),
+ PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL);
+ if (activeDeletionInterval > -1 && blobStore!= null) {
+ File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
+ activeDeletedBlobCollector =
ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir,
executorService);
+ oakRegs.add(
+ scheduleWithFixedDelay(whiteboard, () ->
+ activeDeletedBlobCollector.purgeBlobsDeleted(
+
getSafeTimestampForDeletedBlobs(checkpointMBean),
+ blobStore),
+ activeDeletionInterval));
+
+ log.info("Active blob collector initialized at working dir: {};
deletion interval {} seconds;" +
+ "minAge: {}",
+ blobCollectorWorkingDir, activeDeletionInterval,
MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+ } else {
+ activeDeletedBlobCollector =
ActiveDeletedBlobCollectorFactory.NOOP;
+ log.info("Active blob collector set to NOOP. deletionInterval: {}
seconds; blobStore: {}",
+ activeDeletionInterval, blobStore);
+ }
+ }
+
+ private long getSafeTimestampForDeletedBlobs(CheckpointMBean
checkpointMBean) {
+ long timestamp = clock.getTime() -
TimeUnit.SECONDS.toMillis(MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+
+ CompositeData data = checkpointMBean.getOldestCheckpointCreationTime();
+ Object timestampObj = data.get("timestamp");
+ String timestampStr = null;
+ if (timestampObj != null) {
+ timestampStr = timestampObj.toString();
+ }
+ try {
+ long minCheckpointTimestamp = Long.parseLong(timestampStr);
+ if (minCheckpointTimestamp < timestamp) {
+ log.info("Oldest checkpoint time data ({}) is older than
buffer period for deleted blobs." +
+ " Using that instead", data);
+ timestamp = minCheckpointTimestamp;
+ }
+ } catch (NumberFormatException nfe) {
+ log.warn("Couldn't find timestamp in checkpoint mbean output: {}",
data);
+ }
+
+ return timestamp;
+ }
+
protected void bindNodeAggregator(NodeAggregator aggregator) {
this.nodeAggregator = aggregator;
initialize();
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
Tue Jun 6 06:45:36 2017
@@ -115,7 +115,6 @@ public class ActiveDeletedBlobCollectorF
* due deleted blob
*/
static class ActiveDeletedBlobCollectorImpl implements
ActiveDeletedBlobCollector {
-
private static PerfLogger PERF_LOG = new PerfLogger(
LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName() +
".perf"));
private static Logger LOG =
LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName());
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
Tue Jun 6 06:45:36 2017
@@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.reflect.FieldUtils;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils;
@@ -88,6 +89,7 @@ public class LuceneIndexProviderServiceT
context.registerService(NodeStore.class, new MemoryNodeStore());
context.registerService(IndexPathService.class,
mock(IndexPathService.class));
context.registerService(AsyncIndexInfoService.class,
mock(AsyncIndexInfoService.class));
+ context.registerService(CheckpointMBean.class,
mock(CheckpointMBean.class));
MockOsgi.injectServices(service, context.bundleContext());
}