Author: mduerig
Date: Fri Jul 17 10:34:06 2015
New Revision: 1691509
URL: http://svn.apache.org/r1691509
Log:
OAK-3107: SegmentWriter should be able to store blob IDs longer than 4096 bytes
Implement alternate encoding for blob IDs bigger then 4k
Credits to Francesco Mari for the patch
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/Segment.java
Fri Jul 17 10:34:06 2015
@@ -100,6 +100,15 @@ public class Segment {
*/
public static final int MEDIUM_LIMIT = (1 << (16 - 2)) + SMALL_LIMIT;
+ /**
+ * Maximum size of small blob IDs. A small blob ID is stored in a value
+ * record whose length field contains the pattern "1110" in its most
+ * significant bits. Since two bytes are used to store both the bit pattern
+ * and the actual length of the blob ID, a maximum of 2^12 values can be
+ * stored in the length field.
+ */
+ public static final int BLOB_ID_SMALL_LIMIT = 1 << 12;
+
public static final int REF_COUNT_OFFSET = 5;
static final int ROOT_COUNT_OFFSET = 6;
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentBlob.java
Fri Jul 17 10:34:06 2015
@@ -79,10 +79,11 @@ public class SegmentBlob extends Record
segment.readRecordId(offset + 8), listSize);
return new SegmentStream(getRecordId(), list, length);
} else if ((head & 0xf0) == 0xe0) {
- // 1110 xxxx: external value
- String refererence = readReference(segment, offset, head);
- return segment.getSegmentId().getTracker().getStore()
- .readBlob(refererence).getNewStream();
+ // 1110 xxxx: external value, short blob ID
+ return getNewStream(readShortBlobId(segment, offset, head));
+ } else if ((head & 0xf8) == 0xf0) {
+ // 1111 0xxx: external value, long blob ID
+ return getNewStream(readLongBlobId(segment, offset));
} else {
throw new IllegalStateException(String.format(
"Unexpected value record type: %02x", head & 0xff));
@@ -104,15 +105,11 @@ public class SegmentBlob extends Record
// 110x xxxx: long value
return (segment.readLong(offset) & 0x1fffffffffffffffL) +
MEDIUM_LIMIT;
} else if ((head & 0xf0) == 0xe0) {
- // 1110 xxxx: external value
- String reference = readReference(segment, offset, head);
- long length = segment.getSegmentId().getTracker().getStore()
- .readBlob(reference).length();
- if (length == -1) {
- throw new IllegalStateException(
- "Unknown length of external binary: " + reference);
- }
- return length;
+ // 1110 xxxx: external value, short blob ID
+ return getLength(readShortBlobId(segment, offset, head));
+ } else if ((head & 0xf8) == 0xf0) {
+ // 1111 0xxx: external value, long blob ID
+ return getLength(readLongBlobId(segment, offset));
} else {
throw new IllegalStateException(String.format(
"Unexpected value record type: %02x", head & 0xff));
@@ -150,8 +147,8 @@ public class SegmentBlob extends Record
Segment segment = getSegment();
int offset = getOffset();
byte head = segment.readByte(offset);
- // 1110 xxxx: external value
- return (head & 0xf0) == 0xe0;
+ // 1110 xxxx or 1111 0xxx: external value
+ return (head & 0xf0) == 0xe0 || (head & 0xf8) == 0xf0;
}
public String getBlobId() {
@@ -159,8 +156,11 @@ public class SegmentBlob extends Record
int offset = getOffset();
byte head = segment.readByte(offset);
if ((head & 0xf0) == 0xe0) {
- // 1110 xxxx: external value
- return readReference(segment, offset, head);
+ // 1110 xxxx: external value, small blob ID
+ return readShortBlobId(segment, offset, head);
+ } else if ((head & 0xf8) == 0xf0) {
+ // 1111 0xxx: external value, long blob ID
+ return readLongBlobId(segment, offset);
} else {
return null;
}
@@ -191,7 +191,10 @@ public class SegmentBlob extends Record
return writer.writeLargeBlob(length, list.getEntries());
}
} else if ((head & 0xf0) == 0xe0) {
- // 1110 xxxx: external value
+ // 1110 xxxx: external value, short blob ID
+ return writer.writeExternalBlob(getBlobId());
+ } else if ((head & 0xf8) == 0xf0) {
+ // 1111 0xxx: external value, long blob ID
return writer.writeExternalBlob(getBlobId());
} else {
throw new IllegalStateException(String.format(
@@ -222,14 +225,18 @@ public class SegmentBlob extends Record
//-----------------------------------------------------------< private >--
- private static String readReference(
- Segment segment, int offset, byte head) {
+ private static String readShortBlobId(Segment segment, int offset, byte
head) {
int length = (head & 0x0f) << 8 | (segment.readByte(offset + 1) &
0xff);
byte[] bytes = new byte[length];
segment.readBytes(offset + 2, bytes, 0, length);
return new String(bytes, UTF_8);
}
+ private static String readLongBlobId(Segment segment, int offset) {
+ RecordId blobIdRecordId = segment.readRecordId(offset + 1);
+ return Segment.readString(blobIdRecordId);
+ }
+
private Iterable<SegmentId> getBulkSegmentIds() {
Segment segment = getSegment();
int offset = getOffset();
@@ -250,4 +257,22 @@ public class SegmentBlob extends Record
}
}
+ private Blob getBlob(String blobId) {
+ return
getSegment().getSegmentId().getTracker().getStore().readBlob(blobId);
+ }
+
+ private InputStream getNewStream(String blobId) {
+ return getBlob(blobId).getNewStream();
+ }
+
+ private long getLength(String blobId) {
+ long length = getBlob(blobId).length();
+
+ if (length == -1) {
+ throw new IllegalStateException(String.format("Unknown length of
external binary: %s", blobId));
+ }
+
+ return length;
+ }
+
}
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/segment/SegmentWriter.java
Fri Jul 17 10:34:06 2015
@@ -38,6 +38,7 @@ import static org.apache.jackrabbit.oak.
import static
org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
import static
org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ID_BYTES;
import static
org.apache.jackrabbit.oak.plugins.segment.Segment.SEGMENT_REFERENCE_LIMIT;
+import static org.apache.jackrabbit.oak.plugins.segment.Segment.readString;
import static org.apache.jackrabbit.oak.plugins.segment.SegmentVersion.V_11;
import java.io.ByteArrayInputStream;
@@ -586,32 +587,77 @@ public class SegmentWriter {
}
/**
- * Write a reference to an external blob.
+ * Write a reference to an external blob. This method handles blob IDs of
+ * every length, but behaves differently for small and large blob IDs.
*
- * @param reference reference
- * @return record id
+ * @param blobId Blob ID.
+ * @return Record ID pointing to the written blob ID.
+ * @see Segment#BLOB_ID_SMALL_LIMIT
*/
- private synchronized RecordId writeValueRecord(String reference) {
- byte[] data = reference.getBytes(Charsets.UTF_8);
- int length = data.length;
-
- // When writing a binary ID, the four most significant bits of the
- // length field should be "1110", leaving 12 other bits to store the
- // length itself. This means that the values of the length field can
- // only range between 0 and 2^12 - 1.
-
- checkArgument(length < 4096);
-
- RecordId id = prepare(RecordType.VALUE, 2 + length);
- int len = length | 0xE000;
- buffer[position++] = (byte) (len >> 8);
- buffer[position++] = (byte) len;
+ private RecordId writeBlobId(String blobId) {
+ byte[] data = blobId.getBytes(Charsets.UTF_8);
- System.arraycopy(data, 0, buffer, position, length);
- position += length;
+ if (data.length < Segment.BLOB_ID_SMALL_LIMIT) {
+ return writeSmallBlobId(data);
+ } else {
+ return writeLargeBlobId(blobId);
+ }
+ }
- blobrefs.add(id);
- return id;
+ /**
+ * Write a large blob ID. A blob ID is considered large if the length of
its
+ * binary representation is equal to or greater than {@code
+ * Segment.BLOB_ID_SMALL_LIMIT}.
+ *
+ * @param blobId Blob ID.
+ * @return A record ID pointing to the written blob ID.
+ */
+ private RecordId writeLargeBlobId(String blobId) {
+ RecordId stringRecord = writeString(blobId);
+
+ synchronized (this) {
+ RecordId blobIdRecord = prepare(RecordType.VALUE, 1,
Collections.singletonList(stringRecord));
+
+ // The length uses a fake "length" field that is always equal to
0xF0.
+ // This allows the code to take apart small from a large blob IDs.
+
+ buffer[position++] = (byte) 0xF0;
+ writeRecordId(stringRecord);
+
+ blobrefs.add(blobIdRecord);
+
+ return blobIdRecord;
+ }
+ }
+
+ /**
+ * Write a small blob ID. A blob ID is considered small if the length of
its
+ * binary representation is less than {@code Segment.BLOB_ID_SMALL_LIMIT}.
+ *
+ * @param blobId Blob ID.
+ * @return A record ID pointing to the written blob ID.
+ */
+ private RecordId writeSmallBlobId(byte[] blobId) {
+ int length = blobId.length;
+
+ checkArgument(length < Segment.BLOB_ID_SMALL_LIMIT);
+
+ synchronized (this) {
+ RecordId id = prepare(RecordType.VALUE, 2 + length);
+
+ int masked = length | 0xE000;
+
+ buffer[position++] = (byte) (masked >> 8);
+ buffer[position++] = (byte) (masked);
+
+ System.arraycopy(blobId, 0, buffer, position, length);
+
+ position += length;
+
+ blobrefs.add(id);
+
+ return id;
+ }
}
/**
@@ -663,7 +709,7 @@ public class SegmentWriter {
if (base != null && base.isDiff()) {
Segment segment = base.getSegment();
RecordId key = segment.readRecordId(base.getOffset(8));
- String name = segment.readString(key);
+ String name = readString(key);
if (!changes.containsKey(name)) {
changes.put(name, segment.readRecordId(base.getOffset(8, 1)));
}
@@ -780,7 +826,7 @@ public class SegmentWriter {
if (reference != null && store.getBlobStore() != null) {
String blobId = store.getBlobStore().getBlobId(reference);
if (blobId != null) {
- RecordId id = writeValueRecord(blobId);
+ RecordId id = writeBlobId(blobId);
return new SegmentBlob(id);
} else {
log.debug("No blob found for reference {}, inlining...",
reference);
@@ -790,8 +836,8 @@ public class SegmentWriter {
return writeStream(blob.getNewStream());
}
- SegmentBlob writeExternalBlob(String blobId) throws IOException {
- RecordId id = writeValueRecord(blobId);
+ SegmentBlob writeExternalBlob(String blobId) {
+ RecordId id = writeBlobId(blobId);
return new SegmentBlob(id);
}
@@ -839,7 +885,7 @@ public class SegmentWriter {
} else if (blobStore != null) {
String blobId = blobStore.writeBlob(new SequenceInputStream(
new ByteArrayInputStream(data, 0, n), stream));
- return writeValueRecord(blobId);
+ return writeBlobId(blobId);
}
long length = n;
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java?rev=1691509&r1=1691508&r2=1691509&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/segment/file/ExternalBlobReferenceTest.java
Fri Jul 17 10:34:06 2015
@@ -24,8 +24,8 @@ import static org.mockito.Matchers.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
+import java.io.IOException;
import java.io.InputStream;
-import java.util.Random;
import com.google.common.base.Strings;
import org.apache.jackrabbit.oak.plugins.segment.Segment;
@@ -40,7 +40,7 @@ import org.junit.rules.TemporaryFolder;
public class ExternalBlobReferenceTest {
@Rule
- public TemporaryFolder segmentFolder = new TemporaryFolder();
+ public final TemporaryFolder segmentFolder = new TemporaryFolder();
private FileStore fileStore;
@@ -53,15 +53,15 @@ public class ExternalBlobReferenceTest {
}
@After
- public void destroyFileStore() throws Exception {
+ public void destroyFileStore() {
fileStore.close();
}
/**
* The {@code SegmentWriter} should be able to write blob IDs whose length
- * is between 0 and 4095 bytes. It should be possible to correctly read the
- * blob ID back and pass it to the {@code BlobStore} to obtain information
- * about the blob.
+ * is between 0 and {@code Segment.BLOB_ID_SMALL_LIMIT - 1} bytes. It
should
+ * be possible to correctly read the blob ID back and pass it to the {@code
+ * BlobStore} to obtain information about the blob.
* <p/>
* This code path executes only if the written stream is {@code
* Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream
@@ -71,59 +71,85 @@ public class ExternalBlobReferenceTest {
* See OAK-3105.
*/
@Test
- public void testBlobIdLengthUpperLimit() throws Exception {
- String blobId = Strings.repeat("x", 4095);
- long blobLength = Segment.MEDIUM_LIMIT;
-
- doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class));
- doReturn(blobLength).when(blobStore).getBlobLength(blobId);
-
- SegmentBlob blob =
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
-
- assertEquals(blobLength, blob.length());
+ public void testShortBlobId() throws Exception {
+ testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT - 1);
}
/**
- * If the {@code BlobStore} returns a blob ID whose length is 4096 byes
long
- * (or more), writing the stream should throw an {@code
- * IllegalArgumentException}.
+ * If the {@code BlobStore} returns a blob ID whose length is {@code
+ * Segment.BLOB_ID_SMALL_LIMIT} bytes long (or more), writing the stream
+ * should succeed. In this case, the blob ID is considered a long blob ID
+ * and an alternate encoding is used. It should be possible to correctly
+ * read the blob ID back and pass it to the {@code BlobStore} to obtain
+ * information about the blob.
* <p/>
* This code path executes only if the written stream is {@code
* Segment.MEDIUM_LIMIT} bytes long (or more). If the length of the stream
* is smaller, the binary value is inlined in the segment and the {@code
* BlobStore} is never called.
* <p/>
- * See OAK-3105.
+ * See OAK-3105 and OAK-3107.
*/
- @Test(expected = IllegalArgumentException.class)
- public void testBlobIdLengthLongerThanUpperLimit() throws Exception {
- String blobId = Strings.repeat("x", 4096);
+ @Test
+ public void testLongBlobId() throws Exception {
+ testBlobIdWithLength(Segment.BLOB_ID_SMALL_LIMIT);
+ }
+
+ private void testBlobIdWithLength(int blobIdLength) throws Exception {
+ String blobId = Strings.repeat("x", blobIdLength);
long blobLength = Segment.MEDIUM_LIMIT;
doReturn(blobId).when(blobStore).writeBlob(any(InputStream.class));
+ doReturn(blobLength).when(blobStore).getBlobLength(blobId);
+
+ SegmentBlob blob =
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
+
+ assertEquals(blobLength, blob.length());
+ }
+
+ private static InputStream newRandomInputStream(long size) {
+ return new LimitInputStream(new ConstantInputStream(0), size);
+ }
+
+ private static class ConstantInputStream extends InputStream {
+
+ private final int value;
+
+ public ConstantInputStream(int value) {
+ this.value = value;
+ }
+
+ @Override
+ public int read() {
+ return value;
+ }
-
fileStore.getTracker().getWriter().writeStream(newRandomInputStream(blobLength));
}
- private InputStream newRandomInputStream(final long size) {
- return new InputStream() {
+ private static class LimitInputStream extends InputStream {
- private Random random = new Random();
+ private final InputStream stream;
- private int read = 0;
+ private final long limit;
- @Override
- public int read() {
- if (read >= size) {
- return -1;
- }
+ private long read = 0;
- read += 1;
+ public LimitInputStream(InputStream stream, long limit) {
+ this.stream = stream;
+ this.limit = limit;
+ }
- return random.nextInt() & 0xFF;
+ @Override
+ public int read() throws IOException {
+ if (read >= limit) {
+ return -1;
}
- };
+ read = read + 1;
+
+ return stream.read();
+ }
+
}
}