Author: jukka
Date: Mon Mar 31 19:09:37 2014
New Revision: 1583406

URL: http://svn.apache.org/r1583406
Log:
OAK-631: SegmentMK: Implement garbage collection

Add initial TarFile.cleanup()

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1583406&r1=1583405&r2=1583406&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 Mon Mar 31 19:09:37 2014
@@ -88,7 +88,7 @@ public class Segment {
      */
     static final int MEDIUM_LIMIT = (1 << (16 - 2)) + SMALL_LIMIT;
 
-    static int REF_COUNT_OFFSET = 5;
+    public static int REF_COUNT_OFFSET = 5;
 
     static int ROOT_COUNT_OFFSET = 6;
 

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java?rev=1583406&r1=1583405&r2=1583406&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentId.java
 Mon Mar 31 19:09:37 2014
@@ -23,6 +23,15 @@ import java.util.UUID;
  */
 public class SegmentId implements Comparable<SegmentId> {
 
+    /**
+     * Checks whether this is a data segment identifier.
+     *
+     * @return {@code true} for a data segment, {@code false} otherwise
+     */
+    public static boolean isDataSegmentId(long lsb) {
+        return (lsb >>> 60) == 0xAL;
+    }
+
     private final SegmentTracker tracker;
 
     private final long msb;
@@ -48,7 +57,7 @@ public class SegmentId implements Compar
      * @return {@code true} for a data segment, {@code false} otherwise
      */
     public boolean isDataSegmentId() {
-        return (lsb >>> 60) == 0xAL; 
+        return isDataSegmentId(lsb);
     }
 
     /**

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java?rev=1583406&r1=1583405&r2=1583406&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarEntry.java
 Mon Mar 31 19:09:37 2014
@@ -16,17 +16,63 @@
  */
 package org.apache.jackrabbit.oak.plugins.segment.file;
 
+import java.util.Comparator;
+
 class TarEntry {
 
+    static final Comparator<TarEntry> REVERSE_OFFSET = new 
Comparator<TarEntry>() {
+        @Override
+        public int compare(TarEntry a, TarEntry b) {
+            if (a.offset > b.offset) {
+                return -1;
+            } else if (a.offset < b.offset) {
+                return 1;
+            } else {
+                return 0;
+            }
+        }
+    };
+
+    static final Comparator<TarEntry> IDENTIFIER = new Comparator<TarEntry>() {
+        @Override
+        public int compare(TarEntry a, TarEntry b) {
+            if (a.msb > b.msb) {
+                return 1;
+            } else if (a.msb < b.msb) {
+                return -1;
+            } else if (a.lsb > b.lsb) {
+                return 1;
+            } else if (a.lsb < b.lsb) {
+                return -1;
+            } else {
+                return 0;
+            }
+        }
+    };
+
+    private final long msb;
+
+    private final long lsb;
+
     private final int offset;
 
     private final int size;
 
-    TarEntry(int offset, int size) {
+    TarEntry(long msb, long lsb, int offset, int size) {
+        this.msb = msb;
+        this.lsb = lsb;
         this.offset = offset;
         this.size = size;
     }
 
+    long msb() {
+        return msb;
+    }
+
+    long lsb() {
+        return lsb;
+    }
+
     int offset() {
         return offset;
     }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java?rev=1583406&r1=1583405&r2=1583406&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/file/TarFile.java
 Mon Mar 31 19:09:37 2014
@@ -19,16 +19,16 @@ package org.apache.jackrabbit.oak.plugin
 import static com.google.common.base.Charsets.UTF_8;
 import static com.google.common.base.Preconditions.checkState;
 import static com.google.common.collect.Maps.newConcurrentMap;
-import static com.google.common.collect.Maps.newTreeMap;
+import static com.google.common.collect.Sets.newHashSet;
+import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.REF_COUNT_OFFSET;
+import static 
org.apache.jackrabbit.oak.plugins.segment.SegmentId.isDataSegmentId;
 
 import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
-import java.util.Map;
 import java.util.Set;
-import java.util.SortedMap;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentMap;
 import java.util.zip.CRC32;
@@ -108,7 +108,19 @@ class TarFile {
     }
 
     Set<UUID> getUUIDs() {
-        return entries.keySet();
+        if (entries != null) {
+            return entries.keySet();
+        } else {
+            Set<UUID> uuids = newHashSet();
+            int position = index.position();
+            while (position < index.limit()) {
+                uuids.add(new UUID(
+                        index.getLong(position),
+                        index.getLong(position + 8)));
+                position += 24;
+            }
+            return uuids;
+        }
     }
 
     ByteBuffer readEntry(long msb, long lsb) throws IOException {
@@ -142,13 +154,14 @@ class TarFile {
 
             ByteBuffer index = ByteBuffer.allocate(indexSize);
 
-            SortedMap<UUID, TarEntry> sorted = newTreeMap();
-            sorted.putAll(entries);
-            for (Map.Entry<UUID, TarEntry> entry : sorted.entrySet()) {
-                index.putLong(entry.getKey().getMostSignificantBits());
-                index.putLong(entry.getKey().getLeastSignificantBits());
-                index.putInt(entry.getValue().offset());
-                index.putInt(entry.getValue().size());
+            TarEntry[] sorted =
+                    entries.values().toArray(new TarEntry[entries.size()]);
+            Arrays.sort(sorted, TarEntry.IDENTIFIER);
+            for (TarEntry entry : sorted) {
+                index.putLong(entry.msb());
+                index.putLong(entry.lsb());
+                index.putInt(entry.offset());
+                index.putInt(entry.size());
             }
 
             CRC32 checksum = new CRC32();
@@ -167,7 +180,9 @@ class TarFile {
 
         writeEntryHeader(uuid.toString().getBytes(UTF_8), size);
         access.write(position + BLOCK_SIZE, b, offset, size);
-        entries.put(uuid, new TarEntry(position + BLOCK_SIZE, size));
+        entries.put(uuid, new TarEntry(
+                uuid.getMostSignificantBits(), uuid.getLeastSignificantBits(),
+                position + BLOCK_SIZE, size));
         position += getEntrySize(size);
 
         return true;
@@ -227,6 +242,57 @@ class TarFile {
         access.write(position, header, 0, BLOCK_SIZE);
     }
 
+    public synchronized void cleanup(Set<UUID> referencedIds)
+            throws IOException {
+        TarEntry[] sorted;
+        if (entries != null) {
+            sorted = entries.values().toArray(new TarEntry[entries.size()]);
+        } else {
+            sorted = new TarEntry[index.remaining() / 24];
+            int position = index.position();
+            for (int i = 0; position < index.limit(); i++) {
+                sorted[i]  = new TarEntry(
+                        index.getLong(position),
+                        index.getLong(position + 8),
+                        index.getInt(position + 16),
+                        index.getInt(position + 20));
+                position += 24;
+            }
+        }
+        Arrays.sort(sorted, TarEntry.REVERSE_OFFSET);
+
+        int size = 0;
+        int count = 0;
+        for (int i = 0; i < sorted.length; i++) {
+            TarEntry entry = sorted[i];
+            UUID id = new UUID(entry.msb(), entry.lsb());
+            if (!referencedIds.remove(id)) {
+                // this segment is not referenced anywhere
+                sorted[i] = null;
+            } else if (isDataSegmentId(entry.lsb())) {
+                size += getEntrySize(entry.size());
+                count += 1;
+                // this is a referenced data segment, so follow the graph
+                ByteBuffer segment = access.read(
+                        entry.offset(),
+                        Math.min(entry.size(), 16 * 256));
+                int pos = segment.position();
+                int refcount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
+                int refend = pos + 16 * (refcount + 1);
+                for (int refpos = pos + 16; refpos < refend; refpos += 16) {
+                    referencedIds.add(new UUID(
+                            segment.getLong(refpos),
+                            segment.getLong(refpos + 8)));
+                }
+            }
+        }
+
+        // check if we could free up at least 25% of space
+        if (entries == null && size < access.length() * 3 / 4) {
+            // TODO: write new tar file
+        }
+    }
+
     public boolean flush() throws IOException {
         try {
             access.flush();
@@ -245,7 +311,6 @@ class TarFile {
         }
     }
 
-
     void close() throws IOException {
         access.close();
     }
@@ -292,6 +357,7 @@ class TarFile {
                 } else {
                     // found it!
                     return new TarEntry(
+                            msb, lsb,
                             index.getInt(position + 16),
                             index.getInt(position + 20));
                 }
@@ -410,7 +476,10 @@ class TarFile {
 
             try {
                 UUID id = UUID.fromString(name);
-                entries.put(id, new TarEntry(position + BLOCK_SIZE, size));
+                entries.put(id, new TarEntry(
+                        id.getMostSignificantBits(),
+                        id.getLeastSignificantBits(),
+                        position + BLOCK_SIZE, size));
             } catch (IllegalArgumentException e) {
                 break; // unexpected entry, truncate the file at this point
             }


Reply via email to