Repository: oozie Updated Branches: refs/heads/master 61c646c33 -> 438ba6df7
OOZIE-3250 Reduce heap waste by reducing duplicate byte[] count (andras.piros) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/438ba6df Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/438ba6df Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/438ba6df Branch: refs/heads/master Commit: 438ba6df78fcecf92553db875c1e4624d4555d1f Parents: 61c646c Author: Andras Piros <andras.pi...@cloudera.com> Authored: Wed May 16 13:47:28 2018 +0200 Committer: Andras Piros <andras.pi...@cloudera.com> Committed: Wed May 16 13:54:42 2018 +0200 ---------------------------------------------------------------------- .../main/java/org/apache/oozie/BinaryBlob.java | 16 +++--- .../main/java/org/apache/oozie/StringBlob.java | 7 +-- .../org/apache/oozie/util/ByteArrayUtils.java | 48 ++++++++++++++++++ .../apache/oozie/util/TestByteArrayUtils.java | 53 ++++++++++++++++++++ release-log.txt | 1 + 5 files changed, 115 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/BinaryBlob.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/BinaryBlob.java b/core/src/main/java/org/apache/oozie/BinaryBlob.java index 69bf67e..36a0e60 100644 --- a/core/src/main/java/org/apache/oozie/BinaryBlob.java +++ b/core/src/main/java/org/apache/oozie/BinaryBlob.java @@ -21,8 +21,10 @@ package org.apache.oozie; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; + import org.apache.oozie.compression.CodecFactory; import org.apache.oozie.compression.CompressionCodec; +import org.apache.oozie.util.ByteArrayUtils; /** * BinaryBlob to maintain compress and uncompressed data @@ -40,11 +42,11 @@ public class BinaryBlob { */ public BinaryBlob(byte[] byteArray, boolean isUncompressed) { if (isUncompressed) { - this.bytes = byteArray; + this.bytes = ByteArrayUtils.weakIntern(byteArray); this.rawBlob = null; } else { - this.rawBlob = byteArray; + this.rawBlob = ByteArrayUtils.weakIntern(byteArray); } } @@ -54,7 +56,7 @@ public class BinaryBlob { * @param byteArray the byte array */ public void setBytes(byte[] byteArray) { - this.bytes = byteArray; + this.bytes = ByteArrayUtils.weakIntern(byteArray); this.rawBlob = null; } @@ -74,10 +76,10 @@ public class BinaryBlob { DataInputStream dais = new DataInputStream(new ByteArrayInputStream(rawBlob)); CompressionCodec codec = CodecFactory.getDeCompressionCodec(dais); if (codec != null) { - bytes = codec.decompressToBytes(dais); + bytes = ByteArrayUtils.weakIntern(codec.decompressToBytes(dais)); } else { - bytes = rawBlob; + bytes = ByteArrayUtils.weakIntern(rawBlob); } dais.close(); } @@ -104,14 +106,14 @@ public class BinaryBlob { if (CodecFactory.isCompressionEnabled()) { byte[] headerBytes = CodecFactory.getHeaderBytes(); try { - rawBlob = CodecFactory.getCompressionCodec().compressBytes(headerBytes, bytes); + rawBlob = ByteArrayUtils.weakIntern(CodecFactory.getCompressionCodec().compressBytes(headerBytes, bytes)); } catch (IOException ex) { throw new RuntimeException(ex); } } else { - rawBlob = bytes; + rawBlob = ByteArrayUtils.weakIntern(bytes); } return rawBlob; } http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/StringBlob.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/StringBlob.java b/core/src/main/java/org/apache/oozie/StringBlob.java index b453f09..6c77601 100644 --- a/core/src/main/java/org/apache/oozie/StringBlob.java +++ b/core/src/main/java/org/apache/oozie/StringBlob.java @@ -24,6 +24,7 @@ import java.io.IOException; import org.apache.oozie.compression.CodecFactory; import org.apache.oozie.compression.CompressionCodec; +import org.apache.oozie.util.ByteArrayUtils; import org.apache.oozie.util.StringUtils; /** @@ -40,7 +41,7 @@ public class StringBlob { * @param byteArray the byte array */ public StringBlob(byte[] byteArray) { - this.rawBlob = byteArray; + this.rawBlob = ByteArrayUtils.weakIntern(byteArray); } /** @@ -109,14 +110,14 @@ public class StringBlob { if (CodecFactory.isCompressionEnabled()) { byte[] bytes = CodecFactory.getHeaderBytes(); try { - rawBlob = CodecFactory.getCompressionCodec().compressString(bytes, string); + rawBlob = ByteArrayUtils.weakIntern(CodecFactory.getCompressionCodec().compressString(bytes, string)); } catch (IOException ex) { throw new RuntimeException(ex); } } else { - rawBlob = string.getBytes(); + rawBlob = ByteArrayUtils.weakIntern(string.getBytes()); } return rawBlob; } http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java b/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java new file mode 100644 index 0000000..1ab59ce --- /dev/null +++ b/core/src/main/java/org/apache/oozie/util/ByteArrayUtils.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.oozie.util; + +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; + +import java.nio.ByteBuffer; + +/** + * Utility methods for working with {@link byte[]} primitive values. + * <p> + * Interning {@code byte[]} instances doesn't seem to take too many resources both in terms of CPU and memory: 10k * 10k random + * {@code byte[]} allocation alone takes around 7.8 seconds, allocation plus interning takes around 8.0 seconds. + */ +public class ByteArrayUtils { + private static final Interner<ByteBuffer> BYTE_BUFFER_INTERNER = Interners.newWeakInterner(); + + /** + * Return the internalized {@code byte[]}, or {@code null} if the given {@code byte[]} is {@code null}. A weak reference remains + * to each {@code byte[]} interned, so these are not prevented from being garbage-collected. + * @param values The {@code byte[]} to intern + * @return The identical {@code byte[]} cached in the JVM's weak {@link Interner}. + */ + public static byte[] weakIntern(final byte[] values) { + if (values == null) { + return values; + } + + return BYTE_BUFFER_INTERNER.intern(ByteBuffer.wrap(values)).array(); + } +} http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java b/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java new file mode 100644 index 0000000..cdf7cd0 --- /dev/null +++ b/core/src/test/java/org/apache/oozie/util/TestByteArrayUtils.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.oozie.util; + +import org.junit.Assert; +import org.junit.Test; + +public class TestByteArrayUtils { + + @Test + public void testByteArrayInterningGivesSameInstances() { + final int byteArrayCount = 1000; + final int elementCount = 100; + final byte[][] sameContent = new byte[byteArrayCount][]; + + for (int i = 0; i < byteArrayCount; i++) { + final byte[] source = new byte[elementCount]; + sameContent[i] = source; + for (int j = 0; j < elementCount; j++) { + source[j] = (byte) j; + } + } + + for (int i = 1; i < byteArrayCount; i++) { + Assert.assertTrue("copied byte[]s should be another instances", sameContent[i - 1] != sameContent[i]); + } + + final byte[][] interned = new byte[byteArrayCount][]; + for (int i = 0; i < byteArrayCount; i++) { + interned[i] = ByteArrayUtils.weakIntern(sameContent[i]); + } + + for (int i = 1; i < byteArrayCount; i++) { + Assert.assertTrue("weak interned byte[]s should be the same instance", interned[i - 1] == interned[i]); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/oozie/blob/438ba6df/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index fd7bd76..267af2a 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 5.1.0 release (trunk - unreleased) +OOZIE-3250 Reduce heap waste by reducing duplicate byte[] count (andras.piros) OOZIE-3240 Flaky test TestJMSAccessorService#testConnectionRetry (pbacsko via gezapeti) OOZIE-3246 Flaky test TestJMSJobEventListener#testConnectionDrop (pbacsko via gezapeti) OOZIE-3236 Fix flaky test TestHiveActionExecutor#testHiveAction (pbacsko via gezapeti)