Author: mduerig
Date: Fri Jul 17 10:34:06 2015
New Revision: 1691509

URL: http://svn.apache.org/r1691509
Log:
OAK-3107: SegmentWriter should be able to store blob IDs longer than 4096 bytes
Implement alternate encoding for blob IDs bigger then 4k
Credits to Francesco Mari for the patch

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
 Fri Jul 17 10:34:06 2015
@@ -100,6 +100,15 @@ public class Segment {
      */
     public static final int MEDIUM_LIMIT = (1 << (16 - 2)) + SMALL_LIMIT;
 
+    /**
+     * Maximum size of small blob IDs. A small blob ID is stored in a value
+     * record whose length field contains the pattern "1110" in its most
+     * significant bits. Since two bytes are used to store both the bit pattern
+     * and the actual length of the blob ID, a maximum of 2^12 values can be
+     * stored in the length field.
+     */
+    public static final int BLOB_ID_SMALL_LIMIT = 1 << 12;
+
     public static final int REF_COUNT_OFFSET = 5;
 
     static final int ROOT_COUNT_OFFSET = 6;

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
 Fri Jul 17 10:34:06 2015
@@ -79,10 +79,11 @@ public class SegmentBlob extends Record
                     segment.readRecordId(offset + 8), listSize);
             return new SegmentStream(getRecordId(), list, length);
         } else if ((head & 0xf0) == 0xe0) {
-            // 1110 xxxx: external value
-            String refererence = readReference(segment, offset, head);
-            return segment.getSegmentId().getTracker().getStore()
-                    .readBlob(refererence).getNewStream();
+            // 1110 xxxx: external value, short blob ID
+            return getNewStream(readShortBlobId(segment, offset, head));
+        } else if ((head & 0xf8) == 0xf0) {
+            // 1111 0xxx: external value, long blob ID
+            return getNewStream(readLongBlobId(segment, offset));
         } else {
             throw new IllegalStateException(String.format(
                     "Unexpected value record type: %02x", head & 0xff));
@@ -104,15 +105,11 @@ public class SegmentBlob extends Record
             // 110x xxxx: long value
             return (segment.readLong(offset) & 0x1fffffffffffffffL) + 
MEDIUM_LIMIT;
         } else if ((head & 0xf0) == 0xe0) {
-            // 1110 xxxx: external value
-            String reference = readReference(segment, offset, head);
-            long length = segment.getSegmentId().getTracker().getStore()
-                    .readBlob(reference).length();
-            if (length == -1) {
-                throw new IllegalStateException(
-                        "Unknown length of external binary: " + reference);
-            }
-            return length;
+            // 1110 xxxx: external value, short blob ID
+            return getLength(readShortBlobId(segment, offset, head));
+        } else if ((head & 0xf8) == 0xf0) {
+            // 1111 0xxx: external value, long blob ID
+            return getLength(readLongBlobId(segment, offset));
         } else {
             throw new IllegalStateException(String.format(
                     "Unexpected value record type: %02x", head & 0xff));
@@ -150,8 +147,8 @@ public class SegmentBlob extends Record
         Segment segment = getSegment();
         int offset = getOffset();
         byte head = segment.readByte(offset);
-        // 1110 xxxx: external value
-        return (head & 0xf0) == 0xe0;
+        // 1110 xxxx or 1111 0xxx: external value
+        return (head & 0xf0) == 0xe0 || (head & 0xf8) == 0xf0;
     }
 
     public String getBlobId() {
@@ -159,8 +156,11 @@ public class SegmentBlob extends Record
         int offset = getOffset();
         byte head = segment.readByte(offset);
         if ((head & 0xf0) == 0xe0) {
-            // 1110 xxxx: external value
-            return readReference(segment, offset, head);
+            // 1110 xxxx: external value, small blob ID
+            return readShortBlobId(segment, offset, head);
+        } else if ((head & 0xf8) == 0xf0) {
+            // 1111 0xxx: external value, long blob ID
+            return readLongBlobId(segment, offset);
         } else {
             return null;
         }
@@ -191,7 +191,10 @@ public class SegmentBlob extends Record
                 return writer.writeLargeBlob(length, list.getEntries());
             }
         } else if ((head & 0xf0) == 0xe0) {
-            // 1110 xxxx: external value
+            // 1110 xxxx: external value, short blob ID
+            return writer.writeExternalBlob(getBlobId());
+        } else if ((head & 0xf8) == 0xf0) {
+            // 1111 0xxx: external value, long blob ID
             return writer.writeExternalBlob(getBlobId());
         } else {
             throw new IllegalStateException(String.format(
@@ -222,14 +225,18 @@ public class SegmentBlob extends Record
 
     //-----------------------------------------------------------< private >--
 
-    private static String readReference(
-            Segment segment, int offset, byte head) {
+    private static String readShortBlobId(Segment segment, int offset, byte 
head) {
         int length = (head & 0x0f) << 8 | (segment.readByte(offset + 1) & 
0xff);
         byte[] bytes = new byte[length];
         segment.readBytes(offset + 2, bytes, 0, length);
         return new String(bytes, UTF_8);
     }
 
+    private static String readLongBlobId(Segment segment, int offset) {
+        RecordId blobIdRecordId = segment.readRecordId(offset + 1);
+        return Segment.readString(blobIdRecordId);
+    }
+
     private Iterable<SegmentId> getBulkSegmentIds() {
         Segment segment = getSegment();
         int offset = getOffset();
@@ -250,4 +257,22 @@ public class SegmentBlob extends Record
         }
     }
 
+    private Blob getBlob(String blobId) {
+        return 
getSegment().getSegmentId().getTracker().getStore().readBlob(blobId);
+    }
+
+    private InputStream getNewStream(String blobId) {
+        return getBlob(blobId).getNewStream();
+    }
+
+    private long getLength(String blobId) {
+        long length = getBlob(blobId).length();
+
+        if (length == -1) {
+            throw new IllegalStateException(String.format("Unknown length of 
external binary: %s", blobId));
+        }
+
+        return length;
+    }
+
 }

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
 Fri Jul 17 10:34:06 2015
@@ -38,6 +38,7 @@ import static org.apache.jackrabbit.oak.
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ID_BYTES;
 import static 
org.apache.jackrabbit.oak.plugins.segment.Segment.SEGMENT_REFERENCE_LIMIT;
+import static org.apache.jackrabbit.oak.plugins.segment.Segment.readString;
 import static org.apache.jackrabbit.oak.plugins.segment.SegmentVersion.V_11;
 
 import java.io.ByteArrayInputStream;
@@ -586,32 +587,77 @@ public class SegmentWriter {
     }
 
     /**
-     * Write a reference to an external blob.
+     * Write a reference to an external blob. This method handles blob IDs of
+     * every length, but behaves differently for small and large blob IDs.
      *
-     * @param reference reference
-     * @return record id
+     * @param blobId Blob ID.
+     * @return Record ID pointing to the written blob ID.
+     * @see Segment#BLOB_ID_SMALL_LIMIT
      */
-    private synchronized RecordId writeValueRecord(String reference) {
-        byte[] data = reference.getBytes(Charsets.UTF_8);
-        int length = data.length;
-
-        // When writing a binary ID, the four most significant bits of the
-        // length field should be "1110", leaving 12 other bits to store the
-        // length itself. This means that the values of the length field can
-        // only range between 0 and 2^12 - 1.
-
-        checkArgument(length < 4096);
-
-        RecordId id = prepare(RecordType.VALUE, 2 + length);
-        int len = length | 0xE000;
-        buffer[position++] = (byte) (len >> 8);
-        buffer[position++] = (byte) len;
+    private RecordId writeBlobId(String blobId) {
+        byte[] data = blobId.getBytes(Charsets.UTF_8);
 
-        System.arraycopy(data, 0, buffer, position, length);
-        position += length;
+        if (data.length < Segment.BLOB_ID_SMALL_LIMIT) {
+            return writeSmallBlobId(data);
+        } else {
+            return writeLargeBlobId(blobId);
+        }
+    }
 
-        blobrefs.add(id);
-        return id;
+    /**
+     * Write a large blob ID. A blob ID is considered large if the length of 
its
+     * binary representation is equal to or greater than {@code
+     * Segment.BLOB_ID_SMALL_LIMIT}.
+     *
+     * @param blobId Blob ID.
+     * @return A record ID pointing to the written blob ID.
+     */
+    private RecordId writeLargeBlobId(String blobId) {
+        RecordId stringRecord = writeString(blobId);
+
+        synchronized (this) {
+            RecordId blobIdRecord = prepare(RecordType.VALUE, 1, 
Collections.singletonList(stringRecord));
+
+            // The length uses a fake "length" field that is always equal to 
0xF0.
+            // This allows the code to take apart small from a large blob IDs.
+
+            buffer[position++] = (byte) 0xF0;
+            writeRecordId(stringRecord);
+
+            blobrefs.add(blobIdRecord);
+
+            return blobIdRecord;
+        }
+    }
+
+    /**
+     * Write a small blob ID. A blob ID is considered small if the length of 
its
+     * binary representation is less than {@code Segment.BLOB_ID_SMALL_LIMIT}.
+     *
+     * @param blobId Blob ID.
+     * @return A record ID pointing to the written blob ID.
+     */
+    private RecordId writeSmallBlobId(byte[] blobId) {
+        int length = blobId.length;
+
+        checkArgument(length < Segment.BLOB_ID_SMALL_LIMIT);
+
+        synchronized (this) {
+            RecordId id = prepare(RecordType.VALUE, 2 + length);
+
+            int masked = length | 0xE000;
+
+            buffer[position++] = (byte) (masked >> 8);
+            buffer[position++] = (byte) (masked);
+
+            System.arraycopy(blobId, 0, buffer, position, length);
+
+            position += length;
+
+            blobrefs.add(id);
+
+            return id;
+        }
     }
 
     /**
@@ -663,7 +709,7 @@ public class SegmentWriter {
         if (base != null && base.isDiff()) {
             Segment segment = base.getSegment();
             RecordId key = segment.readRecordId(base.getOffset(8));
-            String name = segment.readString(key);
+            String name = readString(key);
             if (!changes.containsKey(name)) {
                 changes.put(name, segment.readRecordId(base.getOffset(8, 1)));
             }
@@ -780,7 +826,7 @@ public class SegmentWriter {
         if (reference != null && store.getBlobStore() != null) {
             String blobId = store.getBlobStore().getBlobId(reference);
             if (blobId != null) {
-                RecordId id = writeValueRecord(blobId);
+                RecordId id = writeBlobId(blobId);
                 return new SegmentBlob(id);
             } else {
                 log.debug("No blob found for reference {}, inlining...", 
reference);
@@ -790,8 +836,8 @@ public class SegmentWriter {
         return writeStream(blob.getNewStream());
     }
 
-    SegmentBlob writeExternalBlob(String blobId) throws IOException {
-        RecordId id = writeValueRecord(blobId);
+    SegmentBlob writeExternalBlob(String blobId) {
+        RecordId id = writeBlobId(blobId);
         return new SegmentBlob(id);
     }
 
@@ -839,7 +885,7 @@ public class SegmentWriter {
         } else if (blobStore != null) {
             String blobId = blobStore.writeBlob(new SequenceInputStream(
                     new ByteArrayInputStream(data, 0, n), stream));
-            return writeValueRecord(blobId);
+            return writeBlobId(blobId);
         }
 
         long length = n;

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
 Fri Jul 17 10:34:06 2015
@@ -24,8 +24,8 @@ import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 
+import java.io.IOException;
 import java.io.InputStream;
-import java.util.Random;
 
 import com.google.common.base.Strings;
 import org.apache.jackrabbit.oak.plugins.segment.Segment;
@@ -40,7 +40,7 @@ import org.junit.rules.TemporaryFolder;
 public class ExternalBlobReferenceTest {
 
     @Rule
-    public TemporaryFolder segmentFolder = new TemporaryFolder();
+    public final TemporaryFolder segmentFolder = new TemporaryFolder();
 
     private FileStore fileStore;
 
@@ -53,15 +53,15 @@ public class ExternalBlobReferenceTest {
     }
 
     @After
-    public void destroyFileStore() throws Exception {
+    public void destroyFileStore() {
         fileStore.close();
     }
 
     /**
      * The {@code SegmentWriter} should be able to write blob IDs whose length
-     * is between 0 and 4095 bytes. It should be possible to correctly read the
-     * blob ID back and pass it to the {@code BlobStore} to obtain information
-     * about the blob.
+     * is between 0 and {@code Segment.BLOB_ID_SMALL_LIMIT - 1} bytes. It 
should
+     * be possible to correctly read the blob ID back and pass it to the {@code
+     * BlobStore} to obtain information about the blob.
      * <p/>
      * This code path executes only if the written stream is {@code
      * Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream
@@ -71,59 +71,85 @@ public class ExternalBlobReferenceTest {
      * See OAK-3105.
      */
     @Test
-    public void testBlobIdLengthUpperLimit() throws Exception {
-        String blobId = Strings.repeat("x", 4095);
-        long blobLength = Segment.MEDIUM_LIMIT;
-
-        doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class));
-        doReturn(blobLength).when(blobStore).getBlobLength(blobId);
-
-        SegmentBlob blob = 
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
-
-        assertEquals(blobLength, blob.length());
+    public void testShortBlobId() throws Exception {
+        testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT - 1);
     }
 
     /**
-     * If the {@code BlobStore} returns a blob ID whose length is 4096 byes 
long
-     * (or more), writing the stream should throw an {@code
-     * IllegalArgumentException}.
+     * If the {@code BlobStore} returns a blob ID whose length is {@code
+     * Segment.BLOB_ID_SMALL_LIMIT} bytes long (or more), writing the stream
+     * should succeed. In this case, the blob ID is considered a long blob ID
+     * and an alternate encoding is used. It should be possible to correctly
+     * read the blob ID back and pass it to the {@code BlobStore} to obtain
+     * information about the blob.
      * <p/>
      * This code path executes only if the written stream is {@code
      * Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream
      * is smaller, the binary value is inlined in the segment and the {@code
      * BlobStore} is never called.
      * <p/>
-     * See OAK-3105.
+     * See OAK-3105 and OAK-3107.
      */
-    @Test(expected = IllegalArgumentException.class)
-    public void testBlobIdLengthLongerThanUpperLimit() throws Exception {
-        String blobId = Strings.repeat("x", 4096);
+    @Test
+    public void testLongBlobId() throws Exception {
+        testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT);
+    }
+
+    private void testBlobIdWithLength(int blobIdLength) throws Exception {
+        String blobId = Strings.repeat("x", blobIdLength);
         long blobLength = Segment.MEDIUM_LIMIT;
 
         doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class));
+        doReturn(blobLength).when(blobStore).getBlobLength(blobId);
+
+        SegmentBlob blob = 
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
+
+        assertEquals(blobLength, blob.length());
+    }
+
+    private static InputStream newRandomInputStream(long size) {
+        return new LimitInputStream(new ConstantInputStream(0), size);
+    }
+
+    private static class ConstantInputStream extends InputStream {
+
+        private final int value;
+
+        public ConstantInputStream(int value) {
+            this.value = value;
+        }
+
+        @Override
+        public int read() {
+            return value;
+        }
 
-        
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
     }
 
-    private InputStream newRandomInputStream(final long size) {
-        return new InputStream() {
+    private static class LimitInputStream extends InputStream {
 
-            private Random random = new Random();
+        private final InputStream stream;
 
-            private int read = 0;
+        private final long limit;
 
-            @Override
-            public int read() {
-                if (read >= size) {
-                    return -1;
-                }
+        private long read = 0;
 
-                read += 1;
+        public LimitInputStream(InputStream stream, long limit) {
+            this.stream = stream;
+            this.limit = limit;
+        }
 
-                return random.nextInt() & 0xFF;
+        @Override
+        public int read() throws IOException {
+            if (read >= limit) {
+                return -1;
             }
 
-        };
+            read = read + 1;
+
+            return stream.read();
+        }
+
     }
 
 }


Reply via email to