Author: jukka
Date: Thu Mar 20 04:40:27 2014
New Revision: 1579537

URL: http://svn.apache.org/r1579537
Log:
OAK-1512: Jackrabbit 2.x DataStore GC for SegmentNodeStore

Added:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java
   (with props)
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java
   (with props)
Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java

Added: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java?rev=1579537&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java
 Thu Mar 20 04:40:27 2014
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+enum RecordType {
+
+    LEAF,
+
+    BRANCH,
+
+    BUCKET,
+
+    LIST,
+
+    VALUE,
+
+    BLOCK,
+
+    TEMPLATE,
+
+    NODE
+
+}

Propchange: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/RecordType.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java?rev=1579537&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java
 Thu Mar 20 04:40:27 2014
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.segment;
+
+/**
+ * Callback interface for collecting all blob references that are
+ * potentially accessible. Useful for marking referenced blobs as
+ * in use when collecting garbage in an external data store.
+ */
+public interface ReferenceCollector {
+
+    void addReference(String reference);
+
+}

Propchange: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/ReferenceCollector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1579537&r1=1579536&r2=1579537&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 Thu Mar 20 04:40:27 2014
@@ -96,6 +96,8 @@ public class Segment {
 
     static int ROOT_COUNT_OFFSET = 6;
 
+    static int BLOBREF_COUNT_OFFSET = 8;
+
     private final SegmentTracker tracker;
 
     private final SegmentId id;
@@ -221,6 +223,21 @@ public class Segment {
         }
     }
 
+    void collectBlobReferences(ReferenceCollector collector) {
+        int refcount = getRefCount();
+        int rootcount =
+                data.getShort(data.position() + ROOT_COUNT_OFFSET) & 0xffff;
+        int blobrefcount =
+                data.getShort(data.position() + BLOBREF_COUNT_OFFSET) & 0xffff;
+        int blobrefpos = data.position() + refcount * 16 + rootcount * 3;
+
+        for (int i = 0; i < blobrefcount; i++) {
+            int offset = (data.getShort(blobrefpos + i * 2) & 0xffff) << 2;
+            SegmentBlob blob = new SegmentBlob(new RecordId(id, offset));
+            collector.addReference(blob.getReference());
+        }
+    }
+
     byte readByte(int offset) {
         return data.get(pos(offset, 1));
     }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java?rev=1579537&r1=1579536&r2=1579537&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentTracker.java
 Thu Mar 20 04:40:27 2014
@@ -17,10 +17,13 @@
 package org.apache.jackrabbit.oak.plugins.segment;
 
 import static com.google.common.collect.Lists.newLinkedList;
+import static com.google.common.collect.Queues.newArrayDeque;
 import static com.google.common.collect.Sets.newHashSet;
+import static com.google.common.collect.Sets.newIdentityHashSet;
 
 import java.security.SecureRandom;
 import java.util.LinkedList;
+import java.util.Queue;
 import java.util.Set;
 
 /**
@@ -131,6 +134,37 @@ public class SegmentTracker {
     }
 
     /**
+     * Finds all external blob references that are currently accessible
+     * in this repository and adds them to the given collector. Useful
+     * for collecting garbage in an external data store.
+     * <p>
+     * Note that this method only collects blob references that are already
+     * stored in the repository (at the time when this method is called), so
+     * the garbage collector will need some other mechanism for tracking
+     * in-memory references and references stored while this method is
+     * running.
+     */
+    public void collectBlobReferences(ReferenceCollector collector) {
+        Set<SegmentId> processed = newIdentityHashSet();
+        Queue<SegmentId> queue = newArrayDeque(getReferencedSegmentIds());
+        writer.flush(); // force the current segment to have root record info
+        while (!queue.isEmpty()) {
+            SegmentId id = queue.remove();
+            if (id.isDataSegmentId() && !processed.add(id)) {
+                Segment segment = id.getSegment();
+
+                segment.collectBlobReferences(collector);
+
+                for (SegmentId refid : segment.getReferencedIds()) {
+                    if (refid.isDataSegmentId() && !processed.contains(refid)) 
{
+                        queue.add(refid);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
      * 
      * @param msb
      * @param lsb

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1579537&r1=1579536&r2=1579537&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
 Thu Mar 20 04:40:27 2014
@@ -36,6 +36,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.api.Type.NAMES;
 import static 
org.apache.jackrabbit.oak.plugins.segment.MapRecord.BUCKETS_PER_LEVEL;
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
+import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ID_BYTES;
 import static org.apache.jackrabbit.oak.plugins.segment.Segment.align;
 
 import java.io.ByteArrayInputStream;
@@ -71,17 +72,6 @@ import com.google.common.io.Closeables;
 
 public class SegmentWriter {
 
-    private enum RecordType {
-        LEAF,
-        BRANCH,
-        BUCKET,
-        LIST,
-        VALUE,
-        BLOCK,
-        TEMPLATE,
-        NODE
-    }
-
     static final int BLOCK_SIZE = 1 << 12; // 4kB
 
     private static byte[] createNewBuffer() {
@@ -119,6 +109,11 @@ public class SegmentWriter {
     private final Map<RecordId, RecordType> roots = newLinkedHashMap();
 
     /**
+     * Identifiers of the external blob references stored in this segment.
+     */
+    private final List<RecordId> blobrefs = newArrayList();
+
+    /**
      * The segment write buffer, filled from the end to the beginning
      * (see OAK-629).
      */
@@ -162,21 +157,29 @@ public class SegmentWriter {
             buffer[Segment.ROOT_COUNT_OFFSET] = (byte) (rootcount >> 8);
             buffer[Segment.ROOT_COUNT_OFFSET + 1] = (byte) rootcount;
 
-            int rootpos = refcount * 16;
-            if (length + rootpos > buffer.length) {
+            int blobrefcount = blobrefs.size();
+            buffer[Segment.BLOBREF_COUNT_OFFSET] = (byte) (blobrefcount >> 8);
+            buffer[Segment.BLOBREF_COUNT_OFFSET + 1] = (byte) blobrefcount;
+
+            int pos = refcount * 16;
+            if (length + pos > buffer.length) {
                 length = buffer.length;
             } else {
-                System.arraycopy(
-                        buffer, 0,
-                        buffer, buffer.length-length, rootpos);
-                rootpos += buffer.length - length;
+                System.arraycopy(buffer, 0, buffer, buffer.length-length, pos);
+                pos += buffer.length - length;
             }
 
             for (Map.Entry<RecordId, RecordType> entry : roots.entrySet()) {
                 int offset = entry.getKey().getOffset();
-                buffer[rootpos++] = (byte) entry.getValue().ordinal();
-                buffer[rootpos++] = (byte) (offset >> (8 + 
Segment.RECORD_ALIGN_BITS));
-                buffer[rootpos++] = (byte) (offset >> 
Segment.RECORD_ALIGN_BITS);
+                buffer[pos++] = (byte) entry.getValue().ordinal();
+                buffer[pos++] = (byte) (offset >> (8 + 
Segment.RECORD_ALIGN_BITS));
+                buffer[pos++] = (byte) (offset >> Segment.RECORD_ALIGN_BITS);
+            }
+
+            for (RecordId blobref : blobrefs) {
+                int offset = blobref.getOffset();
+                buffer[pos++] = (byte) (offset >> (8 + 
Segment.RECORD_ALIGN_BITS));
+                buffer[pos++] = (byte) (offset >> Segment.RECORD_ALIGN_BITS);
             }
 
             store.writeSegment(
@@ -186,6 +189,7 @@ public class SegmentWriter {
 
             buffer = createNewBuffer();
             roots.clear();
+            blobrefs.clear();
             length = 0;
             position = buffer.length;
             segment = new Segment(tracker, buffer);
@@ -202,6 +206,7 @@ public class SegmentWriter {
         checkArgument(size >= 0);
         checkNotNull(ids);
 
+        int blobrefcount = blobrefs.size() + 1;
         int rootcount = roots.size() + 1;
         int refcount = segment.getRefCount();
         Set<SegmentId> segmentIds = newIdentityHashSet();
@@ -220,10 +225,12 @@ public class SegmentWriter {
             refcount += segmentIds.size();
         }
 
-        int recordSize = Segment.align(size + ids.size() * 
Segment.RECORD_ID_BYTES);
-        int headerSize = Segment.align(refcount * 16 + rootcount * 3);
+        int recordSize = Segment.align(size + ids.size() * RECORD_ID_BYTES);
+        int headerSize = Segment.align(
+                refcount * 16 + rootcount * 3 + blobrefcount * 2);
         int segmentSize = headerSize + recordSize + length;
         if (segmentSize > buffer.length - 1
+                || blobrefcount > 0xffff
                 || rootcount > 0xffff
                 || refcount > Segment.SEGMENT_REFERENCE_LIMIT) {
             flush();
@@ -496,6 +503,8 @@ public class SegmentWriter {
 
         System.arraycopy(data, 0, buffer, position, length);
         position += length;
+
+        blobrefs.add(id);
         return id;
     }
 


Reply via email to