This is an automated email from the ASF dual-hosted git repository. yuzhaojing pushed a commit to branch release-0.12.1-rc1 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit c9dd84088e0def31655c794206bc89a9a1403e57 Author: Sagar Sumit <[email protected]> AuthorDate: Wed Sep 28 22:34:04 2022 +0530 [HUDI-4687] Avoid setAccessible which breaks strong encapsulation (#6657) Use JOL GraphLayout for estimating deep size. --- hudi-common/pom.xml | 5 + .../hudi/common/util/ObjectSizeCalculator.java | 321 +-------------------- .../hudi/common/util/TestObjectSizeCalculator.java | 102 +++++++ .../org/apache/hudi/integ/ITTestHoodieSanity.java | 4 +- packaging/hudi-flink-bundle/pom.xml | 5 + packaging/hudi-hadoop-mr-bundle/pom.xml | 5 + packaging/hudi-hive-sync-bundle/pom.xml | 5 + packaging/hudi-integ-test-bundle/pom.xml | 5 + packaging/hudi-kafka-connect-bundle/pom.xml | 5 + packaging/hudi-presto-bundle/pom.xml | 5 + packaging/hudi-spark-bundle/pom.xml | 5 + packaging/hudi-trino-bundle/pom.xml | 5 + packaging/hudi-utilities-bundle/pom.xml | 5 + pom.xml | 7 + 14 files changed, 175 insertions(+), 309 deletions(-) diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml index 7e7e2a81d4..8b4fa39a62 100644 --- a/hudi-common/pom.xml +++ b/hudi-common/pom.xml @@ -101,6 +101,11 @@ </build> <dependencies> + <dependency> + <groupId>org.openjdk.jol</groupId> + <artifactId>jol-core</artifactId> + </dependency> + <!-- Logging --> <dependency> <groupId>org.apache.logging.log4j</groupId> diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java index 7e625e8eb4..86f1d9215e 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ObjectSizeCalculator.java @@ -18,33 +18,11 @@ package org.apache.hudi.common.util; -import org.apache.hudi.common.util.jvm.MemoryLayoutSpecification; -import org.apache.hudi.common.util.jvm.HotSpotMemoryLayoutSpecification32bit; -import org.apache.hudi.common.util.jvm.HotSpotMemoryLayoutSpecification64bit; -import org.apache.hudi.common.util.jvm.HotSpotMemoryLayoutSpecification64bitCompressed; -import org.apache.hudi.common.util.jvm.OpenJ9MemoryLayoutSpecification32bit; -import org.apache.hudi.common.util.jvm.OpenJ9MemoryLayoutSpecification64bit; -import org.apache.hudi.common.util.jvm.OpenJ9MemoryLayoutSpecification64bitCompressed; - -import java.lang.management.ManagementFactory; -import java.lang.management.MemoryPoolMXBean; -import java.lang.reflect.Array; -import java.lang.reflect.Field; -import java.lang.reflect.Modifier; -import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collections; -import java.util.Deque; -import java.util.IdentityHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.Set; +import org.openjdk.jol.info.GraphLayout; /** * Contains utility methods for calculating the memory usage of objects. It only works on the HotSpot and OpenJ9 JVMs, and infers - * the actual memory layout (32 bit vs. 64 bit word size, compressed object pointers vs. uncompressed) from best + * the actual memory layout (32 bit vs. 64 bit word size, compressed object pointers vs. uncompressed) from the best * available indicators. It can reliably detect a 32 bit vs. 64 bit JVM. It can only make an educated guess at whether * compressed OOPs are used, though; specifically, it knows what the JVM's default choice of OOP compression would be * based on HotSpot version and maximum heap sizes, but if the choice is explicitly overridden with the @@ -54,14 +32,9 @@ import java.util.Set; * @author Attila Szegedi */ public class ObjectSizeCalculator { - private static class CurrentLayout { - - private static final MemoryLayoutSpecification SPEC = getEffectiveMemoryLayoutSpecification(); - } - /** * Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it. - * Attempts to to detect the current JVM memory layout, but may fail with {@link UnsupportedOperationException}; + * Attempts to detect the current JVM memory layout, but may fail with {@link UnsupportedOperationException}; * * @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it * measures the size of all objects reachable through it (which will include its class loader, and by @@ -71,282 +44,16 @@ public class ObjectSizeCalculator { * @throws UnsupportedOperationException if the current vm memory layout cannot be detected. */ public static long getObjectSize(Object obj) throws UnsupportedOperationException { - return obj == null ? 0 : new ObjectSizeCalculator(CurrentLayout.SPEC).calculateObjectSize(obj); - } - - // Fixed object header size for arrays. - private final int arrayHeaderSize; - // Fixed object header size for non-array objects. - private final int objectHeaderSize; - // Padding for the object size - if the object size is not an exact multiple - // of this, it is padded to the next multiple. - private final int objectPadding; - // Size of reference (pointer) fields. - private final int referenceSize; - // Padding for the fields of superclass before fields of subclasses are - // added. - private final int superclassFieldPadding; - - private final Map<Class<?>, ClassSizeInfo> classSizeInfos = new IdentityHashMap<>(); - - private final Set<Object> alreadyVisited = Collections.newSetFromMap(new IdentityHashMap<>()); - private final Deque<Object> pending = new ArrayDeque<>(64); - private long size; - - /** - * Creates an object size calculator that can calculate object sizes for a given {@code memoryLayoutSpecification}. - * - * @param memoryLayoutSpecification a description of the JVM memory layout. - */ - public ObjectSizeCalculator(MemoryLayoutSpecification memoryLayoutSpecification) { - Objects.requireNonNull(memoryLayoutSpecification); - arrayHeaderSize = memoryLayoutSpecification.getArrayHeaderSize(); - objectHeaderSize = memoryLayoutSpecification.getObjectHeaderSize(); - objectPadding = memoryLayoutSpecification.getObjectPadding(); - referenceSize = memoryLayoutSpecification.getReferenceSize(); - superclassFieldPadding = memoryLayoutSpecification.getSuperclassFieldPadding(); - } - - /** - * Given an object, returns the total allocated size, in bytes, of the object and all other objects reachable from it. - * - * @param obj the object; can be null. Passing in a {@link java.lang.Class} object doesn't do anything special, it - * measures the size of all objects reachable through it (which will include its class loader, and by - * extension, all other Class objects loaded by the same loader, and all the parent class loaders). It doesn't - * provide the size of the static fields in the JVM class that the Class object represents. - * @return the total allocated size of the object and all other objects it retains. - */ - public synchronized long calculateObjectSize(Object obj) { - // Breadth-first traversal instead of naive depth-first with recursive - // implementation, so we don't blow the stack traversing long linked lists. - try { - for (;;) { - visit(obj); - if (pending.isEmpty()) { - return size; - } - obj = pending.removeFirst(); - } - } finally { - alreadyVisited.clear(); - pending.clear(); - size = 0; - } - } - - private ClassSizeInfo getClassSizeInfo(final Class<?> clazz) { - ClassSizeInfo csi = classSizeInfos.get(clazz); - if (csi == null) { - csi = new ClassSizeInfo(clazz); - classSizeInfos.put(clazz, csi); - } - return csi; - } - - private void visit(Object obj) { - if (alreadyVisited.contains(obj)) { - return; - } - final Class<?> clazz = obj.getClass(); - if (clazz == ArrayElementsVisitor.class) { - ((ArrayElementsVisitor) obj).visit(this); - } else { - alreadyVisited.add(obj); - if (clazz.isArray()) { - visitArray(obj); - } else { - getClassSizeInfo(clazz).visit(obj, this); - } - } - } - - private void visitArray(Object array) { - final Class<?> componentType = array.getClass().getComponentType(); - final int length = Array.getLength(array); - if (componentType.isPrimitive()) { - increaseByArraySize(length, getPrimitiveFieldSize(componentType)); - } else { - increaseByArraySize(length, referenceSize); - // If we didn't use an ArrayElementsVisitor, we would be enqueueing every - // element of the array here instead. For large arrays, it would - // tremendously enlarge the queue. In essence, we're compressing it into - // a small command object instead. This is different than immediately - // visiting the elements, as their visiting is scheduled for the end of - // the current queue. - switch (length) { - case 0: { - break; - } - case 1: { - enqueue(Array.get(array, 0)); - break; - } - default: { - enqueue(new ArrayElementsVisitor((Object[]) array)); - } - } - } - } - - private void increaseByArraySize(int length, long elementSize) { - increaseSize(roundTo(arrayHeaderSize + length * elementSize, objectPadding)); - } - - private static class ArrayElementsVisitor { - - private final Object[] array; - - ArrayElementsVisitor(Object[] array) { - this.array = array; - } - - public void visit(ObjectSizeCalculator calc) { - for (Object elem : array) { - if (elem != null) { - calc.visit(elem); - } - } - } - } - - void enqueue(Object obj) { - if (obj != null) { - pending.addLast(obj); - } - } - - void increaseSize(long objectSize) { - size += objectSize; - } - - static long roundTo(long x, int multiple) { - return ((x + multiple - 1) / multiple) * multiple; - } - - private class ClassSizeInfo { - - // Padded fields + header size - private final long objectSize; - // Only the fields size - used to calculate the subclasses' memory - // footprint. - private final long fieldsSize; - private final Field[] referenceFields; - - public ClassSizeInfo(Class<?> clazz) { - long fieldsSize = 0; - final List<Field> referenceFields = new LinkedList<>(); - for (Field f : clazz.getDeclaredFields()) { - if (Modifier.isStatic(f.getModifiers())) { - continue; - } - final Class<?> type = f.getType(); - if (type.isPrimitive()) { - fieldsSize += getPrimitiveFieldSize(type); - } else { - f.setAccessible(true); - referenceFields.add(f); - fieldsSize += referenceSize; - } - } - final Class<?> superClass = clazz.getSuperclass(); - if (superClass != null) { - final ClassSizeInfo superClassInfo = getClassSizeInfo(superClass); - fieldsSize += roundTo(superClassInfo.fieldsSize, superclassFieldPadding); - referenceFields.addAll(Arrays.asList(superClassInfo.referenceFields)); - } - this.fieldsSize = fieldsSize; - this.objectSize = roundTo(objectHeaderSize + fieldsSize, objectPadding); - this.referenceFields = referenceFields.toArray(new Field[referenceFields.size()]); - } - - void visit(Object obj, ObjectSizeCalculator calc) { - calc.increaseSize(objectSize); - enqueueReferencedObjects(obj, calc); - } - - public void enqueueReferencedObjects(Object obj, ObjectSizeCalculator calc) { - for (Field f : referenceFields) { - try { - calc.enqueue(f.get(obj)); - } catch (IllegalAccessException e) { - throw new AssertionError("Unexpected denial of access to " + f, e); - } - } - } - } - - private static long getPrimitiveFieldSize(Class<?> type) { - if (type == boolean.class || type == byte.class) { - return 1; - } - if (type == char.class || type == short.class) { - return 2; - } - if (type == int.class || type == float.class) { - return 4; - } - if (type == long.class || type == double.class) { - return 8; - } - throw new AssertionError("Encountered unexpected primitive type " + type.getName()); - } - - static MemoryLayoutSpecification getEffectiveMemoryLayoutSpecification() { - final String vmName = System.getProperty("java.vm.name"); - if (vmName == null || !(vmName.startsWith("Java HotSpot(TM) ") || vmName.startsWith("OpenJDK") - || vmName.startsWith("TwitterJDK") || vmName.startsWith("Eclipse OpenJ9"))) { - throw new UnsupportedOperationException("ObjectSizeCalculator only supported on HotSpot or Eclipse OpenJ9 VMs"); - } - - final String strVmVersion = System.getProperty("java.vm.version"); - // Support for OpenJ9 JVM - if (strVmVersion.startsWith("openj9")) { - final String dataModel = System.getProperty("sun.arch.data.model"); - if ("32".equals(dataModel)) { - // Running with 32-bit data model - return new OpenJ9MemoryLayoutSpecification32bit(); - } else if (!"64".equals(dataModel)) { - throw new UnsupportedOperationException( - "Unrecognized value '" + dataModel + "' of sun.arch.data.model system property"); - } - - long maxMemory = 0; - for (MemoryPoolMXBean mp : ManagementFactory.getMemoryPoolMXBeans()) { - maxMemory += mp.getUsage().getMax(); - } - if (maxMemory < 57L * 1024 * 1024 * 1024) { - // OpenJ9 use compressed references below 57GB of RAM total - return new OpenJ9MemoryLayoutSpecification64bitCompressed(); - } else { - // it's a 64-bit uncompressed references object model - return new OpenJ9MemoryLayoutSpecification64bit(); - } - } else { - // Support for HotSpot JVM - final String dataModel = System.getProperty("sun.arch.data.model"); - if ("32".equals(dataModel)) { - // Running with 32-bit data model - return new HotSpotMemoryLayoutSpecification32bit(); - } else if (!"64".equals(dataModel)) { - throw new UnsupportedOperationException( - "Unrecognized value '" + dataModel + "' of sun.arch.data.model system property"); - } - - final int vmVersion = Integer.parseInt(strVmVersion.substring(0, strVmVersion.indexOf('.'))); - if (vmVersion >= 17) { - long maxMemory = 0; - for (MemoryPoolMXBean mp : ManagementFactory.getMemoryPoolMXBeans()) { - maxMemory += mp.getUsage().getMax(); - } - if (maxMemory < 30L * 1024 * 1024 * 1024) { - // HotSpot 17.0 and above use compressed OOPs below 30GB of RAM total - // for all memory pools (yes, including code cache). - return new HotSpotMemoryLayoutSpecification64bitCompressed(); - } - } - - // In other cases, it's a 64-bit uncompressed OOPs object model - return new HotSpotMemoryLayoutSpecification64bit(); - } + // JDK versions 16 or later enforce strong encapsulation and block illegal reflective access. + // In effect, we cannot calculate object size by deep reflection and invoking `setAccessible` on a field, + // especially when the `isAccessible` is false. More details in JEP 403. While integrating Hudi with other + // software packages that compile against JDK 16 or later (e.g. Trino), the IllegalAccessException will be thrown. + // In that case, we use Java Object Layout (JOL) to estimate the object size. + // + // NOTE: We cannot get the object size base on the amount of byte serialized because there is no guarantee + // that the incoming object is serializable. We could have used Java's Instrumentation API, but it + // needs an instrumentation agent that can be hooked to the JVM. In lieu of that, we are using JOL. + // GraphLayout gives the deep size of an object, including the size of objects that are referenced from the given object. + return obj == null ? 0 : GraphLayout.parseInstance(obj).totalSize(); } } diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java new file mode 100644 index 0000000000..712f4b85f8 --- /dev/null +++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestObjectSizeCalculator.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.util; + +import org.apache.hudi.common.model.HoodieRecord; + +import org.apache.avro.Schema; +import org.junit.jupiter.api.Test; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import static org.apache.hudi.common.util.ObjectSizeCalculator.getObjectSize; +import static org.junit.jupiter.api.Assertions.assertEquals; + +public class TestObjectSizeCalculator { + + @Test + public void testGetObjectSize() { + EmptyClass emptyClass = new EmptyClass(); + StringClass stringClass = new StringClass(); + PayloadClass payloadClass = new PayloadClass(); + String emptyString = ""; + String string = "hello"; + String[] stringArray = {emptyString, string, " world"}; + String[] anotherStringArray = new String[100]; + List<String> stringList = new ArrayList<>(); + StringBuilder stringBuilder = new StringBuilder(100); + int maxIntPrimitive = Integer.MAX_VALUE; + int minIntPrimitive = Integer.MIN_VALUE; + Integer maxInteger = Integer.MAX_VALUE; + Integer minInteger = Integer.MIN_VALUE; + long zeroLong = 0L; + double zeroDouble = 0.0; + boolean booleanField = true; + Object object = new Object(); + String name = "Alice Bob"; + Person person = new Person(name); + + assertEquals(40, getObjectSize(emptyString)); + assertEquals(56, getObjectSize(string)); + assertEquals(184, getObjectSize(stringArray)); + assertEquals(416, getObjectSize(anotherStringArray)); + assertEquals(40, getObjectSize(stringList)); + assertEquals(240, getObjectSize(stringBuilder)); + assertEquals(16, getObjectSize(maxIntPrimitive)); + assertEquals(16, getObjectSize(minIntPrimitive)); + assertEquals(16, getObjectSize(maxInteger)); + assertEquals(16, getObjectSize(minInteger)); + assertEquals(24, getObjectSize(zeroLong)); + assertEquals(24, getObjectSize(zeroDouble)); + assertEquals(16, getObjectSize(booleanField)); + assertEquals(80, getObjectSize(DayOfWeek.TUESDAY)); + assertEquals(16, getObjectSize(object)); + assertEquals(32, getObjectSize(emptyClass)); + assertEquals(40, getObjectSize(stringClass)); + assertEquals(40, getObjectSize(payloadClass)); + assertEquals(1240, getObjectSize(Schema.create(Schema.Type.STRING))); + assertEquals(104, getObjectSize(person)); + } + + class EmptyClass { + } + + class StringClass { + private String s; + } + + class PayloadClass implements Serializable { + private HoodieRecord record; + } + + class Person { + private String name; + + public Person(String name) { + this.name = name; + } + } + + public enum DayOfWeek { + MONDAY, TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY, SUNDAY + } +} diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java index e432f9dc42..40827c650a 100644 --- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java +++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieSanity.java @@ -185,12 +185,12 @@ public class ITTestHoodieSanity extends ITTestBase { // Ensure row count is 80 (without duplicates) (100 - 20 deleted) stdOutErr = executeHiveCommand("select count(1) from " + snapshotTableName); - assertEquals(80, Integer.parseInt(stdOutErr.getLeft().trim()), + assertEquals(80, Integer.parseInt(stdOutErr.getLeft().substring(stdOutErr.getLeft().lastIndexOf("\n")).trim()), "Expecting 80 rows to be present in the snapshot table"); if (roTableName.isPresent()) { stdOutErr = executeHiveCommand("select count(1) from " + roTableName.get()); - assertEquals(80, Integer.parseInt(stdOutErr.getLeft().trim()), + assertEquals(80, Integer.parseInt(stdOutErr.getLeft().substring(stdOutErr.getLeft().lastIndexOf("\n")).trim()), "Expecting 80 rows to be present in the snapshot table"); } diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml index 2b90f139b9..f890b65b00 100644 --- a/packaging/hudi-flink-bundle/pom.xml +++ b/packaging/hudi-flink-bundle/pom.xml @@ -163,6 +163,7 @@ <include>org.apache.htrace:htrace-core4</include> <include>commons-codec:commons-codec</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -222,6 +223,10 @@ <pattern>com.fasterxml.jackson.</pattern> <shadedPattern>${flink.bundle.shade.prefix}com.fasterxml.jackson.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml index 2f3a7a7837..bb20b5691a 100644 --- a/packaging/hudi-hadoop-mr-bundle/pom.xml +++ b/packaging/hudi-hadoop-mr-bundle/pom.xml @@ -90,6 +90,7 @@ <include>org.apache.htrace:htrace-core4</include> <include>com.yammer.metrics:metrics-core</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -144,6 +145,10 @@ <pattern>com.google.common.</pattern> <shadedPattern>org.apache.hudi.com.google.common.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml index 3bff950e24..87ae223a92 100644 --- a/packaging/hudi-hive-sync-bundle/pom.xml +++ b/packaging/hudi-hive-sync-bundle/pom.xml @@ -90,6 +90,7 @@ <include>org.objenesis:objenesis</include> <include>com.esotericsoftware:minlog</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -124,6 +125,10 @@ <pattern>org.apache.htrace.</pattern> <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml index 5bee479997..572a5daa79 100644 --- a/packaging/hudi-integ-test-bundle/pom.xml +++ b/packaging/hudi-integ-test-bundle/pom.xml @@ -184,6 +184,7 @@ <include>io.prometheus:simpleclient_dropwizard</include> <include>io.prometheus:simpleclient_pushgateway</include> <include>io.prometheus:simpleclient_common</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -309,6 +310,10 @@ <pattern>org.apache.parquet.avro.</pattern> <shadedPattern>org.apache.hudi.org.apache.parquet.avro.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-kafka-connect-bundle/pom.xml b/packaging/hudi-kafka-connect-bundle/pom.xml index 6868bd15ed..4a1982e70e 100644 --- a/packaging/hudi-kafka-connect-bundle/pom.xml +++ b/packaging/hudi-kafka-connect-bundle/pom.xml @@ -133,6 +133,7 @@ <include>org.apache.htrace:htrace-core4</include> <include>org.scala-lang:*</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -174,6 +175,10 @@ <pattern>org.apache.htrace.</pattern> <shadedPattern>org.apache.hudi.org.apache.htrace.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml index d005896133..332fdd05f2 100644 --- a/packaging/hudi-presto-bundle/pom.xml +++ b/packaging/hudi-presto-bundle/pom.xml @@ -94,6 +94,7 @@ <include>commons-io:commons-io</include> <include>commons-lang:commons-lang</include> <include>com.google.protobuf:protobuf-java</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -164,6 +165,10 @@ <pattern>org.apache.parquet.avro.</pattern> <shadedPattern>${presto.bundle.bootstrap.shade.prefix}org.apache.parquet.avro.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-spark-bundle/pom.xml b/packaging/hudi-spark-bundle/pom.xml index db430058de..d21842ddb0 100644 --- a/packaging/hudi-spark-bundle/pom.xml +++ b/packaging/hudi-spark-bundle/pom.xml @@ -135,6 +135,7 @@ <include>org.apache.curator:curator-recipes</include> <include>commons-codec:commons-codec</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -220,6 +221,10 @@ <pattern>com.google.common.</pattern> <shadedPattern>org.apache.hudi.com.google.common.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- TODO: Revisit GH ISSUE #533 & PR#633--> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml index acca9e86ca..e378511fd2 100644 --- a/packaging/hudi-trino-bundle/pom.xml +++ b/packaging/hudi-trino-bundle/pom.xml @@ -94,6 +94,7 @@ <include>commons-lang:commons-lang</include> <include>commons-io:commons-io</include> <include>com.google.protobuf:protobuf-java</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -156,6 +157,10 @@ <pattern>com.google.protobuf.</pattern> <shadedPattern>${trino.bundle.bootstrap.shade.prefix}com.google.protobuf.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/packaging/hudi-utilities-bundle/pom.xml b/packaging/hudi-utilities-bundle/pom.xml index 6f53aec314..86d3f9f5b9 100644 --- a/packaging/hudi-utilities-bundle/pom.xml +++ b/packaging/hudi-utilities-bundle/pom.xml @@ -167,6 +167,7 @@ <include>org.apache.curator:curator-recipes</include> <include>commons-codec:commons-codec</include> <include>commons-io:commons-io</include> + <include>org.openjdk.jol:jol-core</include> </includes> </artifactSet> <relocations> @@ -244,6 +245,10 @@ <pattern>org.eclipse.jetty.</pattern> <shadedPattern>org.apache.hudi.org.eclipse.jetty.</shadedPattern> </relocation> + <relocation> + <pattern>org.openjdk.jol.</pattern> + <shadedPattern>org.apache.hudi.org.openjdk.jol.</shadedPattern> + </relocation> <!-- The classes below in org.apache.hadoop.metrics2 package come from hbase-hadoop-compat and hbase-hadoop2-compat, which have to be shaded one by one, instead of shading all classes under org.apache.hadoop.metrics2 including ones diff --git a/pom.xml b/pom.xml index 26a07130ba..7f591f8d3b 100644 --- a/pom.xml +++ b/pom.xml @@ -197,6 +197,7 @@ <protoc.version>3.21.5</protoc.version> <dynamodb.lockclient.version>1.1.0</dynamodb.lockclient.version> <zookeeper.version>3.5.7</zookeeper.version> + <openjdk.jol.version>0.16</openjdk.jol.version> <dynamodb-local.port>8000</dynamodb-local.port> <dynamodb-local.endpoint>http://localhost:${dynamodb-local.port}</dynamodb-local.endpoint> <springboot.version>2.7.3</springboot.version> @@ -594,6 +595,12 @@ <version>${scala.collection-compat.version}</version> </dependency> + <dependency> + <groupId>org.openjdk.jol</groupId> + <artifactId>jol-core</artifactId> + <version>${openjdk.jol.version}</version> + </dependency> + <!-- Logging --> <!-- NOTE: All the following deps have to have "provided" scope to make sure these are not conflicting w/ implementations that are using Hudi as a library. For ex, all Spark < 3.3 are still relying on Log4j1
