stefan-egli commented on code in PR #1473: URL: https://github.com/apache/jackrabbit-oak/pull/1473#discussion_r1608383806
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java: ########## @@ -38,24 +43,52 @@ import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState; import org.apache.jackrabbit.oak.plugins.value.Conversions; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * PropertyState implementation with lazy parsing of the JSOP encoded value. */ final class DocumentPropertyState implements PropertyState { + private static final Logger LOG = LoggerFactory.getLogger(DocumentPropertyState.class); + private final DocumentNodeStore store; private final String name; private final String value; private PropertyState parsed; + private byte[] compressedValue; + private final Compression compression = Compression.GZIP; + + private static final int DEFAULT_COMPRESSION_THRESHOLD = Integer.getInteger("oak.mongo.compressionThreshold", 1024); DocumentPropertyState(DocumentNodeStore store, String name, String value) { this.store = store; this.name = name; - this.value = value; + + int size = value.getBytes().length; + if (size > DEFAULT_COMPRESSION_THRESHOLD) { + compressedValue = compress(value.getBytes()); + this.value = null; + } else { + this.value = value; + } + } + + private byte[] compress(byte[] value) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + OutputStream compressionOutputStream = compression.getOutputStream(out); + compressionOutputStream.write(value); + compressionOutputStream.close(); + return out.toByteArray(); + } catch (IOException e) { + LOG.warn("Failed to compress data: ", value); Review Comment: I wouldn't log the value as that could leak sensitive data into the log file. While we don't have the path, perhaps just log the property name plus the IOException message? ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java: ########## @@ -38,24 +43,52 @@ import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState; import org.apache.jackrabbit.oak.plugins.value.Conversions; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * PropertyState implementation with lazy parsing of the JSOP encoded value. */ final class DocumentPropertyState implements PropertyState { + private static final Logger LOG = LoggerFactory.getLogger(DocumentPropertyState.class); + private final DocumentNodeStore store; private final String name; private final String value; private PropertyState parsed; + private byte[] compressedValue; Review Comment: could/should this be `final` as well? (just one more `null` assignment necessary..) ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java: ########## @@ -38,24 +43,52 @@ import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState; import org.apache.jackrabbit.oak.plugins.value.Conversions; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * PropertyState implementation with lazy parsing of the JSOP encoded value. */ final class DocumentPropertyState implements PropertyState { + private static final Logger LOG = LoggerFactory.getLogger(DocumentPropertyState.class); + private final DocumentNodeStore store; private final String name; private final String value; private PropertyState parsed; + private byte[] compressedValue; + private final Compression compression = Compression.GZIP; + + private static final int DEFAULT_COMPRESSION_THRESHOLD = Integer.getInteger("oak.mongo.compressionThreshold", 1024); DocumentPropertyState(DocumentNodeStore store, String name, String value) { this.store = store; this.name = name; - this.value = value; + + int size = value.getBytes().length; + if (size > DEFAULT_COMPRESSION_THRESHOLD) { + compressedValue = compress(value.getBytes()); + this.value = null; + } else { + this.value = value; + } + } + + private byte[] compress(byte[] value) { + try { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + OutputStream compressionOutputStream = compression.getOutputStream(out); + compressionOutputStream.write(value); + compressionOutputStream.close(); + return out.toByteArray(); + } catch (IOException e) { + LOG.warn("Failed to compress data: ", value); + return value; Review Comment: with now returning the original uncompressed `value` here in exception case, that then likely leads to subsequent issues with uncompressing. It might need more complex logic in exception cases I think. ########## oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java: ########## @@ -38,6 +39,19 @@ public class DocumentPropertyStateTest { private static final int BLOB_SIZE = 16 * 1024; + public static final String TEST = "test"; Review Comment: naming .. but maybe it could indicate what it is used for, eg `TEST_NODE` ? ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java: ########## @@ -116,7 +149,16 @@ public int count() { */ @NotNull String getValue() { - return value; + return value != null ? value : decompress(this.compressedValue); + } + + private String decompress(byte[] value) { + try { + return new String(compression.getInputStream(new ByteArrayInputStream(value)).readAllBytes()); + } catch (IOException e) { + LOG.warn("Failed to decompress data.", value); Review Comment: an IOException on decompress is bad, as that would fail the reading of the value (while the same on compression is not a problem, then it would just not compress). So I'd argue this would justify a `LOG.error` here. problem might then be, that in IOException case this might become a noisy logger .. not sure if we should already consider that now though... but also : I wouldn't log the value itself. ########## oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java: ########## @@ -68,10 +82,10 @@ public void multiValuedBinarySize() throws Exception { for (int i = 0; i < 3; i++) { blobs.add(builder.createBlob(new RandomStream(BLOB_SIZE, i))); } - builder.child("test").setProperty("p", blobs, Type.BINARIES); + builder.child(TEST).setProperty("p", blobs, Type.BINARIES); TestUtils.merge(ns, builder); - PropertyState p = ns.getRoot().getChildNode("test").getProperty("p"); + PropertyState p = ns.getRoot().getChildNode(TEST).getProperty("p"); Review Comment: minor: could also leave this method unchanged as it is unrelated and only cosmetic ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyState.java: ########## @@ -38,24 +43,52 @@ import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState; import org.apache.jackrabbit.oak.plugins.value.Conversions; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * PropertyState implementation with lazy parsing of the JSOP encoded value. */ final class DocumentPropertyState implements PropertyState { + private static final Logger LOG = LoggerFactory.getLogger(DocumentPropertyState.class); + private final DocumentNodeStore store; private final String name; private final String value; private PropertyState parsed; + private byte[] compressedValue; + private final Compression compression = Compression.GZIP; + + private static final int DEFAULT_COMPRESSION_THRESHOLD = Integer.getInteger("oak.mongo.compressionThreshold", 1024); DocumentPropertyState(DocumentNodeStore store, String name, String value) { this.store = store; this.name = name; - this.value = value; + + int size = value.getBytes().length; + if (size > DEFAULT_COMPRESSION_THRESHOLD) { + compressedValue = compress(value.getBytes()); Review Comment: ```suggestion compressedValue = compress(value.getBytes()); ``` formatting ########## oak-store-document/src/test/java/org/apache/jackrabbit/oak/plugins/document/DocumentPropertyStateTest.java: ########## @@ -38,6 +39,19 @@ public class DocumentPropertyStateTest { private static final int BLOB_SIZE = 16 * 1024; + public static final String TEST = "test"; + public static final String STRING_HUGEVALUE = "dummyalgjalegaafdajflalsdddkajf;kdfjakdfjadlsfjalkdsfjakldsfjkladsfjalkdsfjadlk;" + Review Comment: would org.apache.commons.lang3.RandomStringUtils (maybe using the variant that accepts a `Random` that can be seeded with a fixed value) be a useful alternative for this hard coded string? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@jackrabbit.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org