This is an automated email from the ASF dual-hosted git repository. chaokunyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push: new 69f9d61c feat(java): row format supports Record types (#2256) 69f9d61c is described below commit 69f9d61c36ad52ef720e02367d0153c705a01e3b Author: Steven Schlansker <stevenschlans...@gmail.com> AuthorDate: Tue May 27 19:47:38 2025 -0700 feat(java): row format supports Record types (#2256) ## What does this PR do? Support Record types in row format Treated as Bean types, except we have to use canonical constructor instead of unsafe field setting Test only runs on Java 17+ ## Does this PR introduce any user-facing change? New type support in java row format - [ ] Does this PR introduce any public API change? - [ ] Does this PR introduce any binary protocol compatibility change? --- .github/workflows/ci.yml | 7 +++ ci/run_ci.sh | 3 +- integration_tests/latest_jdk_tests/pom.xml | 5 ++ .../fury/integration_tests/RecordRowTest.java | 63 ++++++++++++++++++++ .../main/java/org/apache/fury/type/TypeUtils.java | 3 +- .../org/apache/fury/format/encoder/Encoders.java | 1 + .../fury/format/encoder/RowEncoderBuilder.java | 67 ++++++++++++++++------ .../apache/fury/format/row/binary/BinaryUtils.java | 8 +++ .../org/apache/fury/format/type/TypeInference.java | 2 +- 9 files changed, 137 insertions(+), 22 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a0b4357f..bd1e19ae 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,13 @@ jobs: run: ./ci/run_ci.sh install_pyfury - name: Run CI with Maven run: ./ci/run_ci.sh java${{ matrix.java-version }} + - name: Upload Test Report + uses: actions/upload-artifact@v4 + if: ${{ !cancelled() }} + with: + name: surefire-reports-${{ matrix.java-version }} + path: "**/target/surefire-reports/**" + openj9: name: Openj9 Java CI runs-on: ubuntu-latest diff --git a/ci/run_ci.sh b/ci/run_ci.sh index 6176a4a5..d1cd90fa 100755 --- a/ci/run_ci.sh +++ b/ci/run_ci.sh @@ -146,10 +146,11 @@ integration_tests() { jdk17_plus_tests() { java -version + export JDK_JAVA_OPTIONS="--add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED" echo "Executing fury java tests" cd "$ROOT/java" set +e - mvn -T10 --batch-mode --no-transfer-progress test install -pl '!fury-format,!fury-testsuite' + mvn -T10 --batch-mode --no-transfer-progress install testcode=$? if [[ $testcode -ne 0 ]]; then exit $testcode diff --git a/integration_tests/latest_jdk_tests/pom.xml b/integration_tests/latest_jdk_tests/pom.xml index a21f5b7b..83a3d2f7 100644 --- a/integration_tests/latest_jdk_tests/pom.xml +++ b/integration_tests/latest_jdk_tests/pom.xml @@ -43,6 +43,11 @@ <artifactId>fury-core</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.fury</groupId> + <artifactId>fury-format</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.apache.fury</groupId> <artifactId>fury-test-core</artifactId> diff --git a/integration_tests/latest_jdk_tests/src/test/java/org/apache/fury/integration_tests/RecordRowTest.java b/integration_tests/latest_jdk_tests/src/test/java/org/apache/fury/integration_tests/RecordRowTest.java new file mode 100644 index 00000000..97733ffb --- /dev/null +++ b/integration_tests/latest_jdk_tests/src/test/java/org/apache/fury/integration_tests/RecordRowTest.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.fury.integration_tests; + +import java.time.Instant; +import java.time.LocalDate; +import org.apache.fury.format.encoder.Encoders; +import org.apache.fury.format.encoder.RowEncoder; +import org.apache.fury.format.row.binary.BinaryRow; +import org.apache.fury.memory.MemoryBuffer; +import org.apache.fury.memory.MemoryUtils; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class RecordRowTest { + + public record TestRecord(Instant f1, String f2, LocalDate f3) {} + + // Intentionally mis-ordered to ensure record component order is different from sorted field order + public record OuterTestRecord(long f2, long f1, TestRecord f3) {} + + @Test + public void testRecord() { + final TestRecord bean = + new TestRecord(Instant.ofEpochMilli(42), "Luna", LocalDate.ofEpochDay(1234)); + final RowEncoder<TestRecord> encoder = Encoders.bean(TestRecord.class); + final BinaryRow row = encoder.toRow(bean); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final TestRecord deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean, bean); + } + + @Test + public void testNestedRecord() { + final TestRecord nested = + new TestRecord(Instant.ofEpochMilli(43), "Mars", LocalDate.ofEpochDay(5678)); + final OuterTestRecord bean = new OuterTestRecord(12, 34, nested); + final RowEncoder<OuterTestRecord> encoder = Encoders.bean(OuterTestRecord.class); + final BinaryRow row = encoder.toRow(bean); + final MemoryBuffer buffer = MemoryUtils.wrap(row.toBytes()); + row.pointTo(buffer, 0, buffer.size()); + final OuterTestRecord deserializedBean = encoder.fromRow(row); + Assert.assertEquals(deserializedBean, bean); + } +} diff --git a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java index 663654f8..568b5f10 100644 --- a/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java +++ b/java/fury-core/src/main/java/org/apache/fury/type/TypeUtils.java @@ -54,6 +54,7 @@ import org.apache.fury.reflect.TypeRef; import org.apache.fury.serializer.NonexistentClass; import org.apache.fury.util.Preconditions; import org.apache.fury.util.StringUtils; +import org.apache.fury.util.record.RecordUtils; /** Type utils for common type inference and extraction. */ @SuppressWarnings({"UnstableApiUsage", "unchecked"}) @@ -598,7 +599,7 @@ public class TypeUtils { public static boolean isBean(TypeRef<?> typeRef, TypeResolutionContext ctx) { Class<?> cls = getRawType(typeRef); - if (ctx.isSynthesizedBeanType(cls)) { + if (ctx.isSynthesizedBeanType(cls) || RecordUtils.isRecord(cls)) { return true; } if (Modifier.isAbstract(cls.getModifiers()) || Modifier.isInterface(cls.getModifiers())) { diff --git a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java index 3b581fd1..4c8cb839 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/encoder/Encoders.java @@ -136,6 +136,7 @@ public class Encoders { * <li>time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant * <li>Optional and friends: OptionalInt, OptionalLong, OptionalDouble * <li>collection types: only array and java.util.List currently, map support is in progress + * <li>record types * <li>nested java bean * </ul> */ diff --git a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java index b57f175f..d68b5c7f 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/encoder/RowEncoderBuilder.java @@ -23,6 +23,8 @@ import static org.apache.fury.type.TypeUtils.CLASS_TYPE; import static org.apache.fury.type.TypeUtils.getRawType; import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; import java.util.SortedMap; import org.apache.arrow.vector.types.pojo.Field; @@ -54,6 +56,7 @@ import org.apache.fury.type.TypeUtils; import org.apache.fury.util.GraalvmSupport; import org.apache.fury.util.Preconditions; import org.apache.fury.util.StringUtils; +import org.apache.fury.util.record.RecordUtils; /** Expression builder for building jit row encoder class. */ @SuppressWarnings("UnstableApiUsage") @@ -93,7 +96,7 @@ public class RowEncoderBuilder extends BaseBinaryEncoderBuilder { // non-public class is not accessible in other class. clsExpr = new Expression.StaticInvoke( - Class.class, "forName", CLASS_TYPE, false, Literal.ofString(ctx.type(beanClass))); + Class.class, "forName", CLASS_TYPE, false, Literal.ofString(beanClass.getName())); } ctx.addField(Class.class, "beanClass", clsExpr); ctx.addImports(Field.class, Schema.class); @@ -213,37 +216,60 @@ public class RowEncoderBuilder extends BaseBinaryEncoderBuilder { return new Expression.Return( new Expression.Reference("new " + generatedBeanImplName + "(row)")); } - Expression bean = newBean(); int numFields = schema.getFields().size(); + List<String> fieldNames = new ArrayList<>(numFields); + Expression[] values = new Expression[numFields]; + Descriptor[] descriptors = new Descriptor[numFields]; Expression.ListExpression expressions = new Expression.ListExpression(); - expressions.add(bean); // schema field's name must correspond to descriptor's name. for (int i = 0; i < numFields; i++) { Literal ordinal = Literal.ofInt(i); Descriptor d = getDescriptorByFieldName(schema.getFields().get(i).getName()); + fieldNames.add(d.getName()); + descriptors[i] = d; TypeRef<?> fieldType = d.getTypeRef(); + Expression.Variable value = new Expression.Variable(d.getName(), nullValue(fieldType)); + values[i] = value; + expressions.add(value); Expression.Invoke isNullAt = new Expression.Invoke(row, "isNullAt", TypeUtils.PRIMITIVE_BOOLEAN_TYPE, ordinal); - Expression value = - new Expression.Variable( - "decoded" + i, new Expression.Reference("decode" + i + "(row)", fieldType)); - Expression setActionExpr = setFieldValue(bean, d, value); - Expression action; - if (fieldType.getRawType() == Optional.class) { - Expression setEmptyExpr = - setFieldValue(bean, d, new Expression.StaticInvoke(Optional.class, "empty")); - action = new Expression.If(isNullAt, setEmptyExpr, setActionExpr); - } else { - action = new Expression.If(ExpressionUtils.not(isNullAt), setActionExpr); + Expression decode = + new Expression.If( + ExpressionUtils.not(isNullAt), + new Expression.Assign( + value, new Expression.Reference(decodeMethodName(i) + "(row)", fieldType))); + expressions.add(decode); + } + Expression bean; + if (RecordUtils.isRecord(beanClass)) { + int[] map = RecordUtils.buildRecordComponentMapping(beanClass, fieldNames); + Expression[] args = new Expression[numFields]; + for (int i = 0; i < numFields; i++) { + args[i] = values[map[i]]; + } + bean = new Expression.NewInstance(beanType, beanType.getRawType().getName(), args); + } else { + bean = newBean(); + expressions.add(bean); + for (int i = 0; i < values.length; i++) { + expressions.add(setFieldValue(bean, descriptors[i], values[i])); } - expressions.add(action); } expressions.add(new Expression.Return(bean)); return expressions; } + private static Expression nullValue(TypeRef<?> fieldType) { + Class<?> rawType = fieldType.getRawType(); + if (rawType == Optional.class) { + return new Expression.StaticInvoke( + Optional.class, "empty", "", TypeUtils.OPTIONAL_TYPE, false, true); + } + return new Expression.Reference(TypeUtils.defaultValue(rawType), fieldType); + } + private void addDecoderMethods() { Reference row = new Reference(ROOT_ROW_NAME, binaryRowTypeToken, false); int numFields = schema.getFields().size(); @@ -280,10 +306,9 @@ public class RowEncoderBuilder extends BaseBinaryEncoderBuilder { colType, false, ordinal); - final Expression value = - new Expression.Return(deserializeFor(columnValue, fieldType, fieldCtx)); + Expression value = new Expression.Return(deserializeFor(columnValue, fieldType, fieldCtx)); ctx.addMethod( - "decode" + i, + decodeMethodName(i), value.doGenCode(ctx).code(), fieldType.getRawType(), BinaryRow.class, @@ -307,7 +332,7 @@ public class RowEncoderBuilder extends BaseBinaryEncoderBuilder { TypeRef<?> fieldType = d.getTypeRef(); Expression.Reference decodeValue = - new Expression.Reference("decode" + i + "(row)", fieldType); + new Expression.Reference(decodeMethodName(i) + "(row)", fieldType); Expression getterImpl; if (fieldType.isPrimitive()) { getterImpl = new Expression.Return(decodeValue); @@ -345,6 +370,10 @@ public class RowEncoderBuilder extends BaseBinaryEncoderBuilder { return descriptorsMap.get(name); } + private String decodeMethodName(int i) { + return "decode" + i + "_" + schema.getFields().get(i).getName(); + } + @Override protected Expression beanClassExpr() { if (GraalvmSupport.isGraalBuildtime()) { diff --git a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryUtils.java b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryUtils.java index 53c47f63..168a5530 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryUtils.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/row/binary/BinaryUtils.java @@ -49,6 +49,10 @@ public class BinaryUtils { return "getDate"; } else if (TypeUtils.TIMESTAMP_TYPE.equals(type)) { return "getTimestamp"; + } else if (TypeUtils.INSTANT_TYPE.equals(type)) { + return "getInt64"; + } else if (TypeUtils.LOCAL_DATE_TYPE.equals(type)) { + return "getInt32"; } else if (TypeUtils.STRING_TYPE.equals(type)) { return "getString"; } else if (isArray(type)) { @@ -91,6 +95,10 @@ public class BinaryUtils { return TypeUtils.INT_TYPE; } else if (TypeUtils.TIMESTAMP_TYPE.equals(type)) { return TypeUtils.LONG_TYPE; + } else if (TypeUtils.INSTANT_TYPE.equals(type)) { + return TypeUtils.LONG_TYPE; + } else if (TypeUtils.LOCAL_DATE_TYPE.equals(type)) { + return TypeUtils.INT_TYPE; } else if (TypeUtils.STRING_TYPE.equals(type)) { return TypeUtils.STRING_TYPE; } else if (isArray(type)) { diff --git a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java index 4659c58d..8a47891e 100644 --- a/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java +++ b/java/fury-format/src/main/java/org/apache/fury/format/type/TypeInference.java @@ -242,7 +242,7 @@ public class TypeInference { TypeResolutionContext newCtx = ctx.appendTypePath(rawType); TypeRef<?> fieldType = descriptor.getTypeRef(); Class<?> rawFieldType = getRawType(fieldType); - if (rawType.isInterface() && rawFieldType.isInterface()) { + if (rawFieldType.isInterface()) { newCtx = newCtx.withSynthesizedBeanType(rawFieldType); } return inferField(n, fieldType, newCtx); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@fury.apache.org For additional commands, e-mail: commits-h...@fury.apache.org