Author: mduerig
Date: Wed Apr 27 14:59:37 2016
New Revision: 1741276

URL: http://svn.apache.org/viewvc?rev=1741276&view=rev
Log:
OAK-4284: Garbage left behind when compaction does not succeed
Run cleanup directly after failed compaction passing a predicate for cleaning 
the segment generation created by the compactor

Modified:
    
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
    
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java

Modified: 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java?rev=1741276&r1=1741275&r2=1741276&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
 Wed Apr 27 14:59:37 2016
@@ -62,6 +62,7 @@ import java.util.regex.Pattern;
 
 import javax.annotation.Nonnull;
 
+import com.google.common.base.Predicate;
 import com.google.common.base.Predicates;
 import com.google.common.base.Stopwatch;
 import com.google.common.base.Supplier;
@@ -607,10 +608,10 @@ public class FileStore implements Segmen
         boolean compacted = false;
 
         int gainThreshold = gcOptions.getGainThreshold();
-        boolean runCompaction = true;
+        boolean sufficientEstimatedGain = true;
         if (gainThreshold <= 0) {
-            gcMonitor.info("TarMK GC #{}: estimation skipped because gain 
threshold value ({} <= 0)", GC_COUNT,
-                gainThreshold);
+            gcMonitor.info("TarMK GC #{}: estimation skipped because gain 
threshold value ({} <= 0)",
+                    GC_COUNT, gainThreshold);
         } else if (gcOptions.isPaused()) {
             gcMonitor.info("TarMK GC #{}: estimation skipped because 
compaction is paused", GC_COUNT);
         } else {
@@ -623,8 +624,8 @@ public class FileStore implements Segmen
             }
 
             long gain = estimate.estimateCompactionGain();
-            runCompaction = gain >= gainThreshold;
-            if (runCompaction) {
+            sufficientEstimatedGain = gain >= gainThreshold;
+            if (sufficientEstimatedGain) {
                 gcMonitor.info(
                     "TarMK GC #{}: estimation completed in {} ({} ms). " +
                     "Gain is {}% or {}/{} ({}/{} bytes), so running 
compaction",
@@ -648,17 +649,16 @@ public class FileStore implements Segmen
             }
         }
 
-        if (runCompaction) {
+        if (sufficientEstimatedGain) {
             if (!gcOptions.isPaused()) {
-                compact();
+                if (compact()) {
+                    cleanupNeeded.set(cleanup);
+                }
                 compacted = true;
             } else {
                 gcMonitor.skipped("TarMK GC #{}: compaction paused", GC_COUNT);
             }
         }
-        if (cleanup) {
-            cleanupNeeded.set(!gcOptions.isPaused());
-        }
         return compacted;
     }
 
@@ -867,6 +867,17 @@ public class FileStore implements Segmen
      * skipping the reclaimed segments.
      */
     public List<File> cleanup() throws IOException {
+        return cleanup(new Predicate<Integer>() {
+            // FIXME OAK-4282: Make the number of retained gc generation 
configurable
+            final int retainGeneration = getGcGen() - 1;
+            @Override
+            public boolean apply(Integer generation) {
+                return generation < retainGeneration;
+            }
+        });
+    }
+
+    private List<File> cleanup(Predicate<Integer> reclaimGeneration) throws 
IOException {
         Stopwatch watch = Stopwatch.createStarted();
         long initialSize = size();
         Set<UUID> bulkRefs = newHashSet();
@@ -896,11 +907,9 @@ public class FileStore implements Segmen
             fileStoreLock.writeLock().unlock();
         }
 
-        // FIXME OAK-4282: Make the number of retained gc generation 
configurable
-        int generation = getGcGen() - 1;
         Set<UUID> reclaim = newHashSet();
         for (TarReader reader : cleaned.keySet()) {
-            reader.mark(bulkRefs, reclaim, generation);
+            reader.mark(bulkRefs, reclaim, reclaimGeneration);
             // FIXME OAK-4165: Too verbose logging during revision gc
             log.info("Size of bulk references/reclaim set {}/{}", 
bulkRefs.size(), reclaim.size());
             if (shutdown) {
@@ -1053,8 +1062,9 @@ public class FileStore implements Segmen
      * Copy every referenced record in data (non-bulk) segments. Bulk segments
      * are fully kept (they are only removed in cleanup, if there is no
      * reference to them).
+     * @return {@code true} if compaction succeeded, {@code false} otherwise.
      */
-    public void compact() throws IOException {
+    public boolean compact() throws IOException {
         gcMonitor.info("TarMK GC #{}: compaction started, gc options={}", 
GC_COUNT, gcOptions);
         Stopwatch watch = Stopwatch.createStarted();
 
@@ -1065,7 +1075,7 @@ public class FileStore implements Segmen
         // FIXME OAK-4280: Compaction cannot be cancelled
         // FIXME OAK-4279: Rework offline compaction
         // This way of compacting has not progress logging and cannot be 
cancelled
-        int gcGeneration = tracker.getGcGen() + 1;
+        final int gcGeneration = tracker.getGcGen() + 1;
         SegmentWriter writer = new SegmentWriter(this, 
tracker.getSegmentVersion(),
             new SegmentBufferWriter(this, tracker.getSegmentVersion(), "c", 
gcGeneration),
             new RecordCache<String>() {
@@ -1120,20 +1130,29 @@ public class FileStore implements Segmen
                     gcMonitor.info("TarMK GC #{}: compaction force compacting 
remaining commits", GC_COUNT);
                     if (!forceCompact(writer)) {
                         gcMonitor.warn("TarMK GC #{}: compaction failed to 
force compact remaining commits. " +
-                            "Most likely compaction didn't get exclusive 
access to the store.", GC_COUNT);
+                            "Most likely compaction didn't get exclusive 
access to the store. Cleaning up.",
+                            GC_COUNT);
+                        cleanup(new Predicate<Integer>() {
+                            @Override
+                            public boolean apply(Integer generation) {
+                                return generation == gcGeneration;
+                            }
+                        });
+                        return false;
                     }
                 }
-                // FIXME OAK-4284: Garbage left behind when compaction does 
not succeed
-                // Giving up leaves garbage that will only be cleaned up 2 
generations later!
             }
 
             gcMonitor.info("TarMK GC #{}: compaction completed in {} ({} ms), 
after {} cycles",
                     GC_COUNT, watch, watch.elapsed(MILLISECONDS), cycles - 1);
+            return true;
         } catch (InterruptedException e) {
             gcMonitor.error("TarMK GC #" + GC_COUNT + ": compaction 
interrupted", e);
             currentThread().interrupt();
+            return false;
         } catch (Exception e) {
             gcMonitor.error("TarMK GC #" + GC_COUNT + ": compaction 
encountered an error", e);
+            return false;
         }
     }
 
@@ -1547,7 +1566,7 @@ public class FileStore implements Segmen
         }
 
         @Override
-        public void compact() {
+        public boolean compact() {
             throw new UnsupportedOperationException("Read Only Store");
         }
 

Modified: 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java?rev=1741276&r1=1741275&r2=1741276&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
 Wed Apr 27 14:59:37 2016
@@ -53,6 +53,7 @@ import java.util.zip.CRC32;
 
 import javax.annotation.Nonnull;
 
+import com.google.common.base.Predicate;
 import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
 import org.apache.jackrabbit.oak.segment.SegmentGraph.SegmentGraphVisitor;
@@ -743,14 +744,27 @@ class TarReader implements Closeable {
         }
     }
 
-    void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, int generation) throws 
IOException {
+    /**
+     * Collect reclaimable segments.
+     * A data segment is reclaimable iff its generation is in the {@code 
reclaimGeneration}
+     * predicate.
+     * A bulk segment is reclaimable if it is in {@code bulkRefs} or if it is 
transitively
+     * reachable through a non reclaimable data segment.
+     *
+     * @param bulkRefs  bulk segment gc roots
+     * @param reclaim   reclaimable segments
+     * @param reclaimGeneration  reclaim generation predicate for data segments
+     * @throws IOException
+     */
+    void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, Predicate<Integer> 
reclaimGeneration)
+    throws IOException {
         Map<UUID, List<UUID>> graph = getGraph(true);
         TarEntry[] entries = getEntries();
         for (int i = entries.length - 1; i >= 0; i--) {
             TarEntry entry = entries[i];
             UUID id = new UUID(entry.msb(), entry.lsb());
             if ((!isDataSegmentId(entry.lsb()) && !bulkRefs.remove(id)) ||
-                (isDataSegmentId(entry.lsb()) && entry.generation() < 
generation)) {
+                (isDataSegmentId(entry.lsb()) && 
reclaimGeneration.apply(entry.generation()))) {
                 // non referenced bulk segment or old data segment
                 reclaim.add(id);
             } else {


Reply via email to