This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new 64b79e1 JAMES-2287 Encode BlobId with base64 (#572)
64b79e1 is described below
commit 64b79e14f8313b944f3ea2a3f2bb5a2fd68b6042
Author: Tellier Benoit <[email protected]>
AuthorDate: Mon Aug 9 08:52:04 2021 +0700
JAMES-2287 Encode BlobId with base64 (#572)
SHA-256 generates 32 bytes long hash.
HEX encodes 1 byte with two characters so leads to 64 long
char strings.
Base64 have an overhead of 133% so leads to use 44
characters.
This simple change have the potential to significantly
decrease Cassandra storage space: each message storing 4
blobIds minimum (messageV3 header and body, header in
imapuidtable and messageidtable, not counting attachments)
thus we save 80 bytes per message pre-compression.
Note that deduplication is affected by this fix: similar
content before / after this fix will be affected different
blobIds so will be stored twice.
---
.../src/main/java/org/apache/james/blob/api/HashBlobId.java | 11 +++++++++--
.../apache/james/blob/api/DeduplicationBlobStoreContract.java | 4 ++--
.../test/java/org/apache/james/blob/api/HashBlobIdTest.java | 6 ++----
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git
a/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
b/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
index 88a0015..e716df4 100644
---
a/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
+++
b/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
@@ -20,12 +20,14 @@
package org.apache.james.blob.api;
import java.io.IOException;
+import java.util.Base64;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
+import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import com.google.common.io.ByteSource;
@@ -35,18 +37,23 @@ public class HashBlobId implements BlobId {
@Override
public HashBlobId forPayload(byte[] payload) {
Preconditions.checkArgument(payload != null);
- return new
HashBlobId(Hashing.sha256().hashBytes(payload).toString());
+ return base64(Hashing.sha256().hashBytes(payload));
}
@Override
public BlobId forPayload(ByteSource payload) {
try {
- return new
HashBlobId(payload.hash(Hashing.sha256()).toString());
+ return base64(payload.hash(Hashing.sha256()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
+ private HashBlobId base64(HashCode hashCode) {
+ byte[] bytes = hashCode.asBytes();
+ return new HashBlobId(Base64.getEncoder().encodeToString(bytes));
+ }
+
@Override
public HashBlobId from(String id) {
Preconditions.checkArgument(!Strings.isNullOrEmpty(id));
diff --git
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
index b92e805..6ca9216 100644
---
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
+++
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
@@ -55,7 +55,7 @@ public interface DeduplicationBlobStoreContract {
BlobId blobId = Mono.from(store.save(defaultBucketName, SHORT_STRING,
storagePolicy)).block();
-
assertThat(blobId).isEqualTo(blobIdFactory().from("31f7a65e315586ac198bd798b6629ce4903d0899476d5741a9f32e2e521b6a66"));
+
assertThat(blobId).isEqualTo(blobIdFactory().from("MfemXjFVhqwZi9eYtmKc5JA9CJlHbVdBqfMuLlIbamY="));
}
@ParameterizedTest
@@ -66,7 +66,7 @@ public interface DeduplicationBlobStoreContract {
BlobId blobId = Mono.from(store.save(defaultBucketName,
SHORT_BYTEARRAY, storagePolicy)).block();
-
assertThat(blobId).isEqualTo(blobIdFactory().from("31f7a65e315586ac198bd798b6629ce4903d0899476d5741a9f32e2e521b6a66"));
+
assertThat(blobId).isEqualTo(blobIdFactory().from("MfemXjFVhqwZi9eYtmKc5JA9CJlHbVdBqfMuLlIbamY="));
}
@ParameterizedTest
diff --git
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
index 97d8eaa..5a45727 100644
---
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
+++
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
@@ -24,8 +24,6 @@ import static
org.assertj.core.api.Assertions.assertThatThrownBy;
import java.nio.charset.StandardCharsets;
-import org.apache.james.blob.api.BlobId;
-import org.apache.james.blob.api.HashBlobId;
import org.apache.james.util.ClassLoaderUtils;
import org.junit.jupiter.api.Test;
@@ -69,14 +67,14 @@ public class HashBlobIdTest {
public void forPayloadShouldHashEmptyArray() {
BlobId blobId = BLOB_ID_FACTORY.forPayload(new byte[0]);
-
assertThat(blobId.asString()).isEqualTo("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+
assertThat(blobId.asString()).isEqualTo("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
}
@Test
public void forPayloadShouldHashArray() {
BlobId blobId =
BLOB_ID_FACTORY.forPayload("content".getBytes(StandardCharsets.UTF_8));
-
assertThat(blobId.asString()).isEqualTo("ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73");
+
assertThat(blobId.asString()).isEqualTo("7XACtDnprIRfIjV9giusFERzD722AW0+yUMil7nsn3M=");
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]