Author: catholicon
Date: Tue Jun  6 06:45:36 2017
New Revision: 1797740

URL: http://svn.apache.org/viewvc?rev=1797740&view=rev
Log:
OAK-2808: Active deletion of 'deleted' Lucene index files from DataStore 
without relying on full scale Blob GC

Setup scheduling of purge. Points to note:
* By default, the feature is disabled (scheduler interval = -1)
* Purged blobs need to be deleted before Math.min(oldest_checkpoint_timestamp, 
curr_time - Long.getLong("oak.active.deletion.minAge") )

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderService.java
 Tue Jun  6 06:45:36 2017
@@ -34,6 +34,7 @@ import java.util.concurrent.atomic.Atomi
 
 import javax.annotation.Nonnull;
 import javax.management.NotCompliantMBeanException;
+import javax.management.openmbean.CompositeData;
 
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
@@ -47,6 +48,7 @@ import org.apache.felix.scr.annotations.
 import org.apache.felix.scr.annotations.ReferencePolicy;
 import org.apache.felix.scr.annotations.ReferencePolicyOption;
 import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.cache.CacheStats;
 import org.apache.jackrabbit.oak.commons.PropertiesUtil;
 import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
@@ -75,6 +77,7 @@ import org.apache.jackrabbit.oak.spi.que
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
 import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
 import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
+import org.apache.jackrabbit.oak.stats.Clock;
 import org.apache.jackrabbit.oak.stats.StatisticsProvider;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -90,6 +93,7 @@ import static com.google.common.base.Pre
 import static java.util.Collections.emptyMap;
 import static org.apache.commons.io.FileUtils.ONE_MB;
 import static 
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
+import static 
org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;
 
 @SuppressWarnings("UnusedDeclaration")
 @Component(metatype = true, label = "Apache Jackrabbit Oak 
LuceneIndexProvider")
@@ -233,15 +237,22 @@ public class LuceneIndexProviderService
     )
     private static final String PROP_DISABLE_STORED_INDEX_DEFINITION = 
"disableStoredIndexDefinition";
 
-    private static final boolean PROP_DELETED_BLOB_COLLECTION_ENABLED = false;
+    private static final int PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = 
-1;
     @Property(
-            boolValue = PROP_DELETED_BLOB_COLLECTION_ENABLED,
-            label = "Actively remove deleted index blobs from blob store",
-            description = "Index blobs are explicitly unique and don't require 
mark-sweek type collection." +
-                    "Turning this on would setup early deletion of blobs from 
blob collection that are deleted" +
-                    " during indexing."
-    )
-    private static final String PROP_ENABLE_DELETED_BLOB_COLLECTION_DEFINITION 
= "enableDeletedBlobsCollection";
+            intValue = PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL,
+            label = "Time interval (in seconds) for actively removing deleted 
index blobs from blob store",
+            description = "Index blobs are explicitly unique and don't require 
mark-sweep type collection." +
+                    "This is number of seconds for scheduling clean-up. -1 
would disable the functionality." +
+                    "Cleanup implies purging index blobs marked as deleted 
earlier during some indexing cycle."
+    )
+    private static final String 
PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL = 
"deletedBlobsCollectionInterval";
+    /**
+     * Actively deleted blob must be deleted for at least this long (in 
seconds)
+     */
+    final long MIN_BLOB_AGE_TO_ACTIVELY_DELETE = 
Long.getLong("oak.active.deletion.minAge",
+            TimeUnit.HOURS.toSeconds(24));
+
+    private final Clock clock = Clock.SIMPLE;
 
     private Whiteboard whiteboard;
 
@@ -282,6 +293,9 @@ public class LuceneIndexProviderService
     )
     private GarbageCollectableBlobStore blobStore;
 
+    @Reference
+    private CheckpointMBean checkpointMBean;
+
     private IndexCopier indexCopier;
 
     private ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollector 
activeDeletedBlobCollector;
@@ -323,15 +337,7 @@ public class LuceneIndexProviderService
         initializeExtractedTextCache(bundleContext, config);
         IndexTracker tracker = createTracker(bundleContext, config);
         indexProvider = new LuceneIndexProvider(tracker, scorerFactory, 
augmentorFactory);
-        if (PROP_DELETED_BLOB_COLLECTION_ENABLED && blobStore != null) {
-            File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
-            activeDeletedBlobCollector = 
ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, 
executorService);
-            log.info("Active blob collector initialized at working dir: {}", 
blobCollectorWorkingDir);
-        } else {
-            activeDeletedBlobCollector = 
ActiveDeletedBlobCollectorFactory.NOOP;
-            log.info("Active blob collector set to NOOP. Enable? {}; 
blobStore: {}",
-                    PROP_DELETED_BLOB_COLLECTION_ENABLED, blobStore);
-        }
+        initializeActiveBlobCollector(whiteboard, config);
         initializeLogging(config);
         initialize();
 
@@ -703,6 +709,53 @@ public class LuceneIndexProviderService
         
regs.add(bundleContext.registerService(IndexInfoProvider.class.getName(), 
infoProvider, null));
     }
 
+    private void initializeActiveBlobCollector(Whiteboard whiteboard, 
Map<String, ?> config) {
+        int activeDeletionInterval = PropertiesUtil.toInteger(
+                config.get(PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL),
+                PROP_DELETED_BLOB_COLLECTION_DEFAULT_INTERVAL);
+        if (activeDeletionInterval > -1 && blobStore!= null) {
+            File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
+            activeDeletedBlobCollector = 
ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir, 
executorService);
+            oakRegs.add(
+                    scheduleWithFixedDelay(whiteboard, () ->
+                                activeDeletedBlobCollector.purgeBlobsDeleted(
+                                        
getSafeTimestampForDeletedBlobs(checkpointMBean),
+                                        blobStore),
+                            activeDeletionInterval));
+
+            log.info("Active blob collector initialized at working dir: {}; 
deletion interval {} seconds;" +
+                            "minAge: {}",
+                    blobCollectorWorkingDir, activeDeletionInterval, 
MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+        } else {
+            activeDeletedBlobCollector = 
ActiveDeletedBlobCollectorFactory.NOOP;
+            log.info("Active blob collector set to NOOP. deletionInterval: {} 
seconds; blobStore: {}",
+                    activeDeletionInterval, blobStore);
+        }
+    }
+
+    private long getSafeTimestampForDeletedBlobs(CheckpointMBean 
checkpointMBean) {
+        long timestamp = clock.getTime() - 
TimeUnit.SECONDS.toMillis(MIN_BLOB_AGE_TO_ACTIVELY_DELETE);
+
+        CompositeData data = checkpointMBean.getOldestCheckpointCreationTime();
+        Object timestampObj = data.get("timestamp");
+        String timestampStr = null;
+        if (timestampObj != null) {
+            timestampStr = timestampObj.toString();
+        }
+        try {
+            long minCheckpointTimestamp = Long.parseLong(timestampStr);
+            if (minCheckpointTimestamp < timestamp) {
+                log.info("Oldest checkpoint time data ({}) is older than 
buffer period for deleted blobs." +
+                        " Using that instead", data);
+                timestamp = minCheckpointTimestamp;
+            }
+        } catch (NumberFormatException nfe) {
+            log.warn("Couldn't find timestamp in checkpoint mbean output: {}", 
data);
+        }
+
+        return timestamp;
+    }
+
     protected void bindNodeAggregator(NodeAggregator aggregator) {
         this.nodeAggregator = aggregator;
         initialize();

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/directory/ActiveDeletedBlobCollectorFactory.java
 Tue Jun  6 06:45:36 2017
@@ -115,7 +115,6 @@ public class ActiveDeletedBlobCollectorF
      * due deleted blob
      */
     static class ActiveDeletedBlobCollectorImpl implements 
ActiveDeletedBlobCollector {
-
         private static PerfLogger PERF_LOG = new PerfLogger(
                 
LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName() + 
".perf"));
         private static Logger LOG = 
LoggerFactory.getLogger(ActiveDeletedBlobCollectorImpl.class.getName());

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java?rev=1797740&r1=1797739&r2=1797740&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexProviderServiceTest.java
 Tue Jun  6 06:45:36 2017
@@ -35,6 +35,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.reflect.FieldUtils;
 import org.apache.jackrabbit.oak.api.Blob;
 import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
+import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.CachingFileDataStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreBlobStore;
 import org.apache.jackrabbit.oak.plugins.blob.datastore.DataStoreUtils;
@@ -88,6 +89,7 @@ public class LuceneIndexProviderServiceT
         context.registerService(NodeStore.class, new MemoryNodeStore());
         context.registerService(IndexPathService.class, 
mock(IndexPathService.class));
         context.registerService(AsyncIndexInfoService.class, 
mock(AsyncIndexInfoService.class));
+        context.registerService(CheckpointMBean.class, 
mock(CheckpointMBean.class));
         MockOsgi.injectServices(service, context.bundleContext());
     }
 


Reply via email to