Author: mduerig
Date: Wed Apr 27 14:59:37 2016
New Revision: 1741276
URL: http://svn.apache.org/viewvc?rev=1741276&view=rev
Log:
OAK-4284: Garbage left behind when compaction does not succeed
Run cleanup directly after failed compaction passing a predicate for cleaning
the segment generation created by the compactor
Modified:
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
Modified:
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java?rev=1741276&r1=1741275&r2=1741276&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
(original)
+++
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/FileStore.java
Wed Apr 27 14:59:37 2016
@@ -62,6 +62,7 @@ import java.util.regex.Pattern;
import javax.annotation.Nonnull;
+import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Stopwatch;
import com.google.common.base.Supplier;
@@ -607,10 +608,10 @@ public class FileStore implements Segmen
boolean compacted = false;
int gainThreshold = gcOptions.getGainThreshold();
- boolean runCompaction = true;
+ boolean sufficientEstimatedGain = true;
if (gainThreshold <= 0) {
- gcMonitor.info("TarMK GC #{}: estimation skipped because gain
threshold value ({} <= 0)", GC_COUNT,
- gainThreshold);
+ gcMonitor.info("TarMK GC #{}: estimation skipped because gain
threshold value ({} <= 0)",
+ GC_COUNT, gainThreshold);
} else if (gcOptions.isPaused()) {
gcMonitor.info("TarMK GC #{}: estimation skipped because
compaction is paused", GC_COUNT);
} else {
@@ -623,8 +624,8 @@ public class FileStore implements Segmen
}
long gain = estimate.estimateCompactionGain();
- runCompaction = gain >= gainThreshold;
- if (runCompaction) {
+ sufficientEstimatedGain = gain >= gainThreshold;
+ if (sufficientEstimatedGain) {
gcMonitor.info(
"TarMK GC #{}: estimation completed in {} ({} ms). " +
"Gain is {}% or {}/{} ({}/{} bytes), so running
compaction",
@@ -648,17 +649,16 @@ public class FileStore implements Segmen
}
}
- if (runCompaction) {
+ if (sufficientEstimatedGain) {
if (!gcOptions.isPaused()) {
- compact();
+ if (compact()) {
+ cleanupNeeded.set(cleanup);
+ }
compacted = true;
} else {
gcMonitor.skipped("TarMK GC #{}: compaction paused", GC_COUNT);
}
}
- if (cleanup) {
- cleanupNeeded.set(!gcOptions.isPaused());
- }
return compacted;
}
@@ -867,6 +867,17 @@ public class FileStore implements Segmen
* skipping the reclaimed segments.
*/
public List<File> cleanup() throws IOException {
+ return cleanup(new Predicate<Integer>() {
+ // FIXME OAK-4282: Make the number of retained gc generation
configurable
+ final int retainGeneration = getGcGen() - 1;
+ @Override
+ public boolean apply(Integer generation) {
+ return generation < retainGeneration;
+ }
+ });
+ }
+
+ private List<File> cleanup(Predicate<Integer> reclaimGeneration) throws
IOException {
Stopwatch watch = Stopwatch.createStarted();
long initialSize = size();
Set<UUID> bulkRefs = newHashSet();
@@ -896,11 +907,9 @@ public class FileStore implements Segmen
fileStoreLock.writeLock().unlock();
}
- // FIXME OAK-4282: Make the number of retained gc generation
configurable
- int generation = getGcGen() - 1;
Set<UUID> reclaim = newHashSet();
for (TarReader reader : cleaned.keySet()) {
- reader.mark(bulkRefs, reclaim, generation);
+ reader.mark(bulkRefs, reclaim, reclaimGeneration);
// FIXME OAK-4165: Too verbose logging during revision gc
log.info("Size of bulk references/reclaim set {}/{}",
bulkRefs.size(), reclaim.size());
if (shutdown) {
@@ -1053,8 +1062,9 @@ public class FileStore implements Segmen
* Copy every referenced record in data (non-bulk) segments. Bulk segments
* are fully kept (they are only removed in cleanup, if there is no
* reference to them).
+ * @return {@code true} if compaction succeeded, {@code false} otherwise.
*/
- public void compact() throws IOException {
+ public boolean compact() throws IOException {
gcMonitor.info("TarMK GC #{}: compaction started, gc options={}",
GC_COUNT, gcOptions);
Stopwatch watch = Stopwatch.createStarted();
@@ -1065,7 +1075,7 @@ public class FileStore implements Segmen
// FIXME OAK-4280: Compaction cannot be cancelled
// FIXME OAK-4279: Rework offline compaction
// This way of compacting has not progress logging and cannot be
cancelled
- int gcGeneration = tracker.getGcGen() + 1;
+ final int gcGeneration = tracker.getGcGen() + 1;
SegmentWriter writer = new SegmentWriter(this,
tracker.getSegmentVersion(),
new SegmentBufferWriter(this, tracker.getSegmentVersion(), "c",
gcGeneration),
new RecordCache<String>() {
@@ -1120,20 +1130,29 @@ public class FileStore implements Segmen
gcMonitor.info("TarMK GC #{}: compaction force compacting
remaining commits", GC_COUNT);
if (!forceCompact(writer)) {
gcMonitor.warn("TarMK GC #{}: compaction failed to
force compact remaining commits. " +
- "Most likely compaction didn't get exclusive
access to the store.", GC_COUNT);
+ "Most likely compaction didn't get exclusive
access to the store. Cleaning up.",
+ GC_COUNT);
+ cleanup(new Predicate<Integer>() {
+ @Override
+ public boolean apply(Integer generation) {
+ return generation == gcGeneration;
+ }
+ });
+ return false;
}
}
- // FIXME OAK-4284: Garbage left behind when compaction does
not succeed
- // Giving up leaves garbage that will only be cleaned up 2
generations later!
}
gcMonitor.info("TarMK GC #{}: compaction completed in {} ({} ms),
after {} cycles",
GC_COUNT, watch, watch.elapsed(MILLISECONDS), cycles - 1);
+ return true;
} catch (InterruptedException e) {
gcMonitor.error("TarMK GC #" + GC_COUNT + ": compaction
interrupted", e);
currentThread().interrupt();
+ return false;
} catch (Exception e) {
gcMonitor.error("TarMK GC #" + GC_COUNT + ": compaction
encountered an error", e);
+ return false;
}
}
@@ -1547,7 +1566,7 @@ public class FileStore implements Segmen
}
@Override
- public void compact() {
+ public boolean compact() {
throw new UnsupportedOperationException("Read Only Store");
}
Modified:
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java?rev=1741276&r1=1741275&r2=1741276&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
(original)
+++
jackrabbit/oak/trunk/oak-segment-next/src/main/java/org/apache/jackrabbit/oak/segment/file/TarReader.java
Wed Apr 27 14:59:37 2016
@@ -53,6 +53,7 @@ import java.util.zip.CRC32;
import javax.annotation.Nonnull;
+import com.google.common.base.Predicate;
import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.segment.SegmentGraph.SegmentGraphVisitor;
@@ -743,14 +744,27 @@ class TarReader implements Closeable {
}
}
- void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, int generation) throws
IOException {
+ /**
+ * Collect reclaimable segments.
+ * A data segment is reclaimable iff its generation is in the {@code
reclaimGeneration}
+ * predicate.
+ * A bulk segment is reclaimable if it is in {@code bulkRefs} or if it is
transitively
+ * reachable through a non reclaimable data segment.
+ *
+ * @param bulkRefs bulk segment gc roots
+ * @param reclaim reclaimable segments
+ * @param reclaimGeneration reclaim generation predicate for data segments
+ * @throws IOException
+ */
+ void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, Predicate<Integer>
reclaimGeneration)
+ throws IOException {
Map<UUID, List<UUID>> graph = getGraph(true);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if ((!isDataSegmentId(entry.lsb()) && !bulkRefs.remove(id)) ||
- (isDataSegmentId(entry.lsb()) && entry.generation() <
generation)) {
+ (isDataSegmentId(entry.lsb()) &&
reclaimGeneration.apply(entry.generation()))) {
// non referenced bulk segment or old data segment
reclaim.add(id);
} else {