stefan-egli commented on code in PR #1473:
URL: https://github.com/apache/jackrabbit-oak/pull/1473#discussion_r1608383806


##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java:
##########
@@ -38,24 +43,52 @@
 import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
 import org.apache.jackrabbit.oak.plugins.value.Conversions;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * PropertyState implementation with lazy parsing of the JSOP encoded value.
  */
 final class DocumentPropertyState implements PropertyState {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(DocumentPropertyState.class);
+
     private final DocumentNodeStore store;
 
     private final String name;
 
     private final String value;
 
     private PropertyState parsed;
+    private byte[] compressedValue;
+    private final Compression compression = Compression.GZIP;
+
+    private static final int DEFAULT_COMPRESSION_THRESHOLD = 
Integer.getInteger("oak.mongo.compressionThreshold", 1024);
 
     DocumentPropertyState(DocumentNodeStore store, String name, String value) {
         this.store = store;
         this.name = name;
-        this.value = value;
+
+        int size = value.getBytes().length;
+        if (size > DEFAULT_COMPRESSION_THRESHOLD) {
+            compressedValue =  compress(value.getBytes());
+            this.value = null;
+        } else {
+            this.value = value;
+        }
+    }
+
+    private byte[] compress(byte[] value) {
+        try {
+            ByteArrayOutputStream out = new ByteArrayOutputStream();
+            OutputStream compressionOutputStream = 
compression.getOutputStream(out);
+            compressionOutputStream.write(value);
+            compressionOutputStream.close();
+            return out.toByteArray();
+        } catch (IOException e) {
+            LOG.warn("Failed to compress data: ", value);

Review Comment:
   I wouldn't log the value as that could leak sensitive data into the log 
file. While we don't have the path, perhaps just log the property name plus the 
IOException message?



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java:
##########
@@ -38,24 +43,52 @@
 import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
 import org.apache.jackrabbit.oak.plugins.value.Conversions;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * PropertyState implementation with lazy parsing of the JSOP encoded value.
  */
 final class DocumentPropertyState implements PropertyState {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(DocumentPropertyState.class);
+
     private final DocumentNodeStore store;
 
     private final String name;
 
     private final String value;
 
     private PropertyState parsed;
+    private byte[] compressedValue;

Review Comment:
   could/should this be `final` as well? (just one more `null` assignment 
necessary..)



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java:
##########
@@ -38,24 +43,52 @@
 import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
 import org.apache.jackrabbit.oak.plugins.value.Conversions;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * PropertyState implementation with lazy parsing of the JSOP encoded value.
  */
 final class DocumentPropertyState implements PropertyState {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(DocumentPropertyState.class);
+
     private final DocumentNodeStore store;
 
     private final String name;
 
     private final String value;
 
     private PropertyState parsed;
+    private byte[] compressedValue;
+    private final Compression compression = Compression.GZIP;
+
+    private static final int DEFAULT_COMPRESSION_THRESHOLD = 
Integer.getInteger("oak.mongo.compressionThreshold", 1024);
 
     DocumentPropertyState(DocumentNodeStore store, String name, String value) {
         this.store = store;
         this.name = name;
-        this.value = value;
+
+        int size = value.getBytes().length;
+        if (size > DEFAULT_COMPRESSION_THRESHOLD) {
+            compressedValue =  compress(value.getBytes());
+            this.value = null;
+        } else {
+            this.value = value;
+        }
+    }
+
+    private byte[] compress(byte[] value) {
+        try {
+            ByteArrayOutputStream out = new ByteArrayOutputStream();
+            OutputStream compressionOutputStream = 
compression.getOutputStream(out);
+            compressionOutputStream.write(value);
+            compressionOutputStream.close();
+            return out.toByteArray();
+        } catch (IOException e) {
+            LOG.warn("Failed to compress data: ", value);
+            return value;

Review Comment:
   with now returning the original uncompressed `value` here in exception case, 
that then likely leads to subsequent issues with uncompressing. It might need 
more complex logic in exception cases I think.



##########
oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java:
##########
@@ -38,6 +39,19 @@
 public class DocumentPropertyStateTest {
 
     private static final int BLOB_SIZE = 16 * 1024;
+    public static final String TEST = "test";

Review Comment:
   naming .. but maybe it could indicate what it is used for, eg `TEST_NODE` ?



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java:
##########
@@ -116,7 +149,16 @@ public int count() {
      */
     @NotNull
     String getValue() {
-        return value;
+        return value != null ? value : decompress(this.compressedValue);
+    }
+
+    private String decompress(byte[] value) {
+        try {
+            return new String(compression.getInputStream(new 
ByteArrayInputStream(value)).readAllBytes());
+        } catch (IOException e) {
+            LOG.warn("Failed to decompress data.", value);

Review Comment:
   an IOException on decompress is bad, as that would fail the reading of the 
value (while the same on compression is not a problem, then it would just not 
compress). So I'd argue this would justify a `LOG.error` here.
   
   problem might then be, that in IOException case this might become a noisy 
logger .. not sure if we should already consider that now though...
   
   but also : I wouldn't log the value itself.



##########
oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java:
##########
@@ -68,10 +82,10 @@ public void multiValuedBinarySize() throws Exception {
         for (int i = 0; i < 3; i++) {
             blobs.add(builder.createBlob(new RandomStream(BLOB_SIZE, i)));
         }
-        builder.child("test").setProperty("p", blobs, Type.BINARIES);
+        builder.child(TEST).setProperty("p", blobs, Type.BINARIES);
         TestUtils.merge(ns, builder);
 
-        PropertyState p = ns.getRoot().getChildNode("test").getProperty("p");
+        PropertyState p = ns.getRoot().getChildNode(TEST).getProperty("p");

Review Comment:
   minor: could also leave this method unchanged as it is unrelated and only 
cosmetic



##########
oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java:
##########
@@ -38,24 +43,52 @@
 import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
 import org.apache.jackrabbit.oak.plugins.value.Conversions;
 import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * PropertyState implementation with lazy parsing of the JSOP encoded value.
  */
 final class DocumentPropertyState implements PropertyState {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(DocumentPropertyState.class);
+
     private final DocumentNodeStore store;
 
     private final String name;
 
     private final String value;
 
     private PropertyState parsed;
+    private byte[] compressedValue;
+    private final Compression compression = Compression.GZIP;
+
+    private static final int DEFAULT_COMPRESSION_THRESHOLD = 
Integer.getInteger("oak.mongo.compressionThreshold", 1024);
 
     DocumentPropertyState(DocumentNodeStore store, String name, String value) {
         this.store = store;
         this.name = name;
-        this.value = value;
+
+        int size = value.getBytes().length;
+        if (size > DEFAULT_COMPRESSION_THRESHOLD) {
+            compressedValue =  compress(value.getBytes());

Review Comment:
   ```suggestion
               compressedValue = compress(value.getBytes());
   ```
   formatting



##########
oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java:
##########
@@ -38,6 +39,19 @@
 public class DocumentPropertyStateTest {
 
     private static final int BLOB_SIZE = 16 * 1024;
+    public static final String TEST = "test";
+    public static final String STRING_HUGEVALUE = 
"dummyalgjalegaafdajflalsdddkajf;kdfjakdfjadlsfjalkdsfjakldsfjkladsfjalkdsfjadlk;"
 +

Review Comment:
   would org.apache.commons.lang3.RandomStringUtils (maybe using the variant 
that accepts a `Random` that can be seeded with a fixed value) be a useful 
alternative for this hard coded string?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to