This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new 64b79e1  JAMES-2287 Encode BlobId with base64 (#572)
64b79e1 is described below

commit 64b79e14f8313b944f3ea2a3f2bb5a2fd68b6042
Author: Tellier Benoit <[email protected]>
AuthorDate: Mon Aug 9 08:52:04 2021 +0700

    JAMES-2287 Encode BlobId with base64 (#572)
    
    SHA-256 generates 32 bytes long hash.
    
    HEX encodes 1 byte with two characters so leads to 64 long
     char strings.
    
    Base64 have an overhead of 133% so leads to use 44
    characters.
    
    This simple change have the potential to significantly
    decrease Cassandra storage space: each message storing 4
    blobIds minimum (messageV3 header and body, header in
     imapuidtable and messageidtable, not counting attachments)
    thus we save 80 bytes per message pre-compression.
    
    Note that deduplication is affected by this fix: similar
    content before / after this fix will be affected different
    blobIds so will be stored twice.
---
 .../src/main/java/org/apache/james/blob/api/HashBlobId.java   | 11 +++++++++--
 .../apache/james/blob/api/DeduplicationBlobStoreContract.java |  4 ++--
 .../test/java/org/apache/james/blob/api/HashBlobIdTest.java   |  6 ++----
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git 
a/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java 
b/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
index 88a0015..e716df4 100644
--- 
a/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
+++ 
b/server/blob/blob-api/src/main/java/org/apache/james/blob/api/HashBlobId.java
@@ -20,12 +20,14 @@
 package org.apache.james.blob.api;
 
 import java.io.IOException;
+import java.util.Base64;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
+import com.google.common.hash.HashCode;
 import com.google.common.hash.Hashing;
 import com.google.common.io.ByteSource;
 
@@ -35,18 +37,23 @@ public class HashBlobId implements BlobId {
         @Override
         public HashBlobId forPayload(byte[] payload) {
             Preconditions.checkArgument(payload != null);
-            return new 
HashBlobId(Hashing.sha256().hashBytes(payload).toString());
+            return base64(Hashing.sha256().hashBytes(payload));
         }
 
         @Override
         public BlobId forPayload(ByteSource payload) {
             try {
-                return new 
HashBlobId(payload.hash(Hashing.sha256()).toString());
+                return base64(payload.hash(Hashing.sha256()));
             } catch (IOException e) {
                 throw new RuntimeException(e);
             }
         }
 
+        private HashBlobId base64(HashCode hashCode) {
+            byte[] bytes = hashCode.asBytes();
+            return new HashBlobId(Base64.getEncoder().encodeToString(bytes));
+        }
+
         @Override
         public HashBlobId from(String id) {
             Preconditions.checkArgument(!Strings.isNullOrEmpty(id));
diff --git 
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
 
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
index b92e805..6ca9216 100644
--- 
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
+++ 
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/DeduplicationBlobStoreContract.java
@@ -55,7 +55,7 @@ public interface DeduplicationBlobStoreContract {
 
         BlobId blobId = Mono.from(store.save(defaultBucketName, SHORT_STRING, 
storagePolicy)).block();
 
-        
assertThat(blobId).isEqualTo(blobIdFactory().from("31f7a65e315586ac198bd798b6629ce4903d0899476d5741a9f32e2e521b6a66"));
+        
assertThat(blobId).isEqualTo(blobIdFactory().from("MfemXjFVhqwZi9eYtmKc5JA9CJlHbVdBqfMuLlIbamY="));
     }
 
     @ParameterizedTest
@@ -66,7 +66,7 @@ public interface DeduplicationBlobStoreContract {
 
         BlobId blobId = Mono.from(store.save(defaultBucketName, 
SHORT_BYTEARRAY, storagePolicy)).block();
 
-        
assertThat(blobId).isEqualTo(blobIdFactory().from("31f7a65e315586ac198bd798b6629ce4903d0899476d5741a9f32e2e521b6a66"));
+        
assertThat(blobId).isEqualTo(blobIdFactory().from("MfemXjFVhqwZi9eYtmKc5JA9CJlHbVdBqfMuLlIbamY="));
     }
 
     @ParameterizedTest
diff --git 
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
 
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
index 97d8eaa..5a45727 100644
--- 
a/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
+++ 
b/server/blob/blob-api/src/test/java/org/apache/james/blob/api/HashBlobIdTest.java
@@ -24,8 +24,6 @@ import static 
org.assertj.core.api.Assertions.assertThatThrownBy;
 
 import java.nio.charset.StandardCharsets;
 
-import org.apache.james.blob.api.BlobId;
-import org.apache.james.blob.api.HashBlobId;
 import org.apache.james.util.ClassLoaderUtils;
 import org.junit.jupiter.api.Test;
 
@@ -69,14 +67,14 @@ public class HashBlobIdTest {
     public void forPayloadShouldHashEmptyArray() {
         BlobId blobId = BLOB_ID_FACTORY.forPayload(new byte[0]);
 
-        
assertThat(blobId.asString()).isEqualTo("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+        
assertThat(blobId.asString()).isEqualTo("47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=");
     }
 
     @Test
     public void forPayloadShouldHashArray() {
         BlobId blobId = 
BLOB_ID_FACTORY.forPayload("content".getBytes(StandardCharsets.UTF_8));
 
-        
assertThat(blobId.asString()).isEqualTo("ed7002b439e9ac845f22357d822bac1444730fbdb6016d3ec9432297b9ec9f73");
+        
assertThat(blobId.asString()).isEqualTo("7XACtDnprIRfIjV9giusFERzD722AW0+yUMil7nsn3M=");
     }
 
     @Test

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to