This is an automated email from the ASF dual-hosted git repository. singhpk234 pushed a commit to branch feature/serialize-bound-expression in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit 67f0039c4a004156dc1476ee4512f6c4f2f81f67 Author: Prashant Singh <[email protected]> AuthorDate: Wed Oct 22 17:33:32 2025 -0700 Address review feedback --- .../apache/iceberg/expressions/ExpressionUtil.java | 2 -- .../apache/iceberg/expressions/Expressions.java | 14 ++++---- .../apache/iceberg/expressions/NamedReference.java | 10 +++--- .../iceberg/expressions/ResolvedReference.java | 38 +++++----------------- .../iceberg/expressions/TestResolvedReference.java | 35 ++++++++++---------- open-api/rest-catalog-open-api.yaml | 15 +++++++++ 6 files changed, 53 insertions(+), 61 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 6450321eda..d3dc00d914 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -232,8 +232,6 @@ public class ExpressionUtil { + ")"; } else if (term instanceof NamedReference) { return ((NamedReference<?>) term).name(); - } else if (term instanceof ResolvedReference) { - return ((ResolvedReference<?>) term).name(); } else if (term instanceof BoundReference) { return ((BoundReference<?>) term).name(); } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java index 72c17d8570..04f5caa357 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Expressions.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Expressions.java @@ -103,33 +103,33 @@ public class Expressions { } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> bucket(ResolvedReference<S> resolvedRef, int numBuckets) { + public static <S, T> UnboundTerm<T> bucket(NamedReference<S> resolvedRef, int numBuckets) { Transform<S, T> transform = (Transform<S, T>) Transforms.bucket(numBuckets); return new UnboundTransform<>(resolvedRef, transform); } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> year(ResolvedReference<S> resolvedRef) { + public static <S, T> UnboundTerm<T> year(NamedReference<S> resolvedRef) { return new UnboundTransform<>(resolvedRef, (Transform<S, T>) Transforms.year()); } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> month(ResolvedReference<S> resolvedRef) { + public static <S, T> UnboundTerm<T> month(NamedReference<S> resolvedRef) { return new UnboundTransform<>(resolvedRef, (Transform<S, T>) Transforms.month()); } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> day(ResolvedReference<S> resolvedRef) { + public static <S, T> UnboundTerm<T> day(NamedReference<S> resolvedRef) { return new UnboundTransform<>(resolvedRef, (Transform<S, T>) Transforms.day()); } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> hour(ResolvedReference<S> resolvedRef) { + public static <S, T> UnboundTerm<T> hour(NamedReference<S> resolvedRef) { return new UnboundTransform<>(resolvedRef, (Transform<S, T>) Transforms.hour()); } @SuppressWarnings("unchecked") - public static <S, T> UnboundTerm<T> truncate(ResolvedReference<S> resolvedRef, int width) { + public static <S, T> UnboundTerm<T> truncate(NamedReference<S> resolvedRef, int width) { Transform<S, T> transform = (Transform<S, T>) Transforms.truncate(width); return new UnboundTransform<>(resolvedRef, transform); } @@ -348,7 +348,7 @@ public class Expressions { * @param <T> the Java type of this reference * @return a named reference */ - public static <T> ResolvedReference<T> ref(String name, int fieldId) { + static <T> ResolvedReference<T> ref(String name, int fieldId) { return new ResolvedReference<>(name, fieldId); } diff --git a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java index b435e2bed5..fdc6b700a3 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java @@ -38,16 +38,16 @@ public class NamedReference<T> implements UnboundTerm<T>, Reference<T> { @Override public BoundReference<T> bind(Types.StructType struct, boolean caseSensitive) { - ValidationException.check(name() != null, "Name cannot be null"); + ValidationException.check(name != null, "Name cannot be null"); Schema schema = struct.asSchema(); Types.NestedField field = - caseSensitive ? schema.findField(name()) : schema.caseInsensitiveFindField(name()); + caseSensitive ? schema.findField(name) : schema.caseInsensitiveFindField(name); ValidationException.check( - field != null, "Cannot find field '%s' in struct: %s", name(), schema.asStruct()); + field != null, "Cannot find field '%s' in struct: %s", name, schema.asStruct()); - return new BoundReference<>(field, schema.accessorForField(field.fieldId()), name()); + return new BoundReference<>(field, schema.accessorForField(field.fieldId()), name); } @Override @@ -57,6 +57,6 @@ public class NamedReference<T> implements UnboundTerm<T>, Reference<T> { @Override public String toString() { - return String.format("ref(name=\"%s\")", name()); + return String.format("ref(name=\"%s\")", name); } } diff --git a/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java b/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java index bc206b76a4..d92b2600c5 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ResolvedReference.java @@ -22,6 +22,13 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.exceptions.ValidationException; import org.apache.iceberg.types.Types; +/** + * A reference to a field by ID rather than name. This extends NamedReference because it is a more + * specific in that the name has already been resolved to a field ID. Because the name has been + * resolved, the name is informational. + * + * @param <T> + */ public class ResolvedReference<T> extends NamedReference<T> { private final int fieldId; @@ -39,40 +46,13 @@ public class ResolvedReference<T> extends NamedReference<T> { Schema schema = struct.asSchema(); Types.NestedField field = schema.findField(fieldId); ValidationException.check( - field != null, - "Cannot find field with id '%s' in struct: %s, since we are resolving based on ID", - fieldId, - schema.asStruct()); + field != null, "Cannot find field by id %s in struct: %s", fieldId, schema.asStruct()); return new BoundReference<>(field, schema.accessorForField(field.fieldId()), name()); } - @Override - public NamedReference<T> ref() { - return new NamedReference<>(name()); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - - if (o == null || getClass() != o.getClass()) { - return false; - } - - ResolvedReference<?> that = (ResolvedReference<?>) o; - return fieldId == that.fieldId && name().equals(that.name()); - } - - @Override - public int hashCode() { - return 31 * fieldId + name().hashCode(); - } - @Override public String toString() { - return String.format("ref(name=\"%s\", fieldId=\"%s\")", name(), fieldId); + return String.format("ref(name=\"%s\", id=\"%s\")", name(), fieldId); } } diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java b/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java index 9a9d84db1f..661a21429d 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestResolvedReference.java @@ -40,11 +40,12 @@ public class TestResolvedReference { ResolvedReference<Integer> ref4 = new ResolvedReference<>("a", 35); // Equal references - assertThat(ref1).isEqualTo(ref2); - assertThat(ref1.hashCode()).isEqualTo(ref2.hashCode()); + assertThat(ref1.fieldId()).isEqualTo(ref2.fieldId()); + assertThat(ref1.name()).isEqualTo(ref2.name()); // Different names, same fieldId - assertThat(ref1).isNotEqualTo(ref3); + assertThat(ref1.fieldId()).isEqualTo(ref3.fieldId()); + assertThat(ref1.name()).isNotEqualTo(ref3.name()); // Same name, different fieldId assertThat(ref1).isNotEqualTo(ref4); @@ -53,13 +54,12 @@ public class TestResolvedReference { @Test public void testResolvedReferenceBind() { ResolvedReference<Integer> ref = new ResolvedReference<>("a", 34); - BoundTerm<Integer> bound = ref.bind(SCHEMA.asStruct(), true); + BoundReference<Integer> bound = ref.bind(SCHEMA.asStruct(), true); assertThat(bound).isInstanceOf(BoundReference.class); - BoundReference<Integer> boundRef = (BoundReference<Integer>) bound; - assertThat(boundRef.fieldId()).isEqualTo(34); - assertThat(boundRef.name()).isEqualTo("a"); - assertThat(boundRef.type()).isEqualTo(Types.IntegerType.get()); + assertThat(bound.fieldId()).isEqualTo(34); + assertThat(bound.name()).isEqualTo("a"); + assertThat(bound.type()).isEqualTo(Types.IntegerType.get()); } @Test @@ -67,13 +67,13 @@ public class TestResolvedReference { ResolvedReference<Integer> ref = new ResolvedReference<>("A", 34); // Should work regardless of case sensitivity since we use fieldId - BoundTerm<Integer> bound1 = ref.bind(SCHEMA.asStruct(), true); - BoundTerm<Integer> bound2 = ref.bind(SCHEMA.asStruct(), false); + BoundReference<Integer> bound1 = ref.bind(SCHEMA.asStruct(), true); + BoundReference<Integer> bound2 = ref.bind(SCHEMA.asStruct(), false); assertThat(bound1).isInstanceOf(BoundReference.class); assertThat(bound2).isInstanceOf(BoundReference.class); - assertThat(((BoundReference<Integer>) bound1).fieldId()).isEqualTo(34); - assertThat(((BoundReference<Integer>) bound2).fieldId()).isEqualTo(34); + assertThat(bound1.fieldId()).isEqualTo(34); + assertThat(bound2.fieldId()).isEqualTo(34); } @Test @@ -83,7 +83,7 @@ public class TestResolvedReference { assertThatThrownBy(() -> ref.bind(SCHEMA.asStruct(), true)) .isInstanceOf(ValidationException.class) .hasMessageContaining( - "Cannot find field with id '999' in struct: struct<34: a: optional int, 35: s: required string>, since we are resolving based on ID"); + "Cannot find field by id 999 in struct: struct<34: a: optional int, 35: s: required string>"); } @Test @@ -98,19 +98,18 @@ public class TestResolvedReference { public void testResolvedReferenceToString() { ResolvedReference<Integer> ref = new ResolvedReference<>("a", 34); - assertThat(ref.toString()).isEqualTo("ref(name=\"a\", fieldId=\"34\")"); + assertThat(ref.toString()).isEqualTo("ref(name=\"a\", id=\"34\")"); } @Test public void testResolvedReferenceExpressionIntegration() { // Test that ResolvedReference works in expression predicates - Expression expr = Expressions.equal(Expressions.ref("a", 34), 5); + UnboundPredicate<?> expr = Expressions.equal(Expressions.ref("a", 34), 5); assertThat(expr).isInstanceOf(UnboundPredicate.class); - UnboundPredicate<?> predicate = (UnboundPredicate<?>) expr; - assertThat(predicate.term()).isInstanceOf(ResolvedReference.class); + assertThat(expr.term()).isInstanceOf(ResolvedReference.class); - ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term(); + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) expr.term(); assertThat(resolvedRef.name()).isEqualTo("a"); assertThat(resolvedRef.fieldId()).isEqualTo(34); } diff --git a/open-api/rest-catalog-open-api.yaml b/open-api/rest-catalog-open-api.yaml index e0b524a6d7..a5571ef15a 100644 --- a/open-api/rest-catalog-open-api.yaml +++ b/open-api/rest-catalog-open-api.yaml @@ -2290,10 +2290,25 @@ components: - $ref: '#/components/schemas/TransformTerm' Reference: + oneOf: + - $ref: '#/components/schemas/NamedReference' + - $ref: '#/components/schemas/ResolvedReference' + + NamedReference: type: string example: - "column-name" + ResolvedReference: + allOf: + - $ref: '#/components/schemas/NamedReference' + - type: object + required: + - field-id + properties: + field-id: + type: integer + TransformTerm: type: object required:
