This is an automated email from the ASF dual-hosted git repository. singhpk234 pushed a commit to branch feature/serialize-bound-expression in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit a2499368221549cf6596a2bcd8cda14e03ea0ae6 Author: Prashant Kumar Singh <[email protected]> AuthorDate: Wed Oct 22 19:20:09 2025 +0000 more cleanup --- .../apache/iceberg/expressions/NamedReference.java | 2 + .../iceberg/expressions/UnboundTransform.java | 11 +-- .../iceberg/expressions/ExpressionParser.java | 82 +++++++++++++--- .../TestExpressionParserWithResolvedReference.java | 107 ++++++++++++++++----- 4 files changed, 155 insertions(+), 47 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java index c2d83bc4df..b435e2bed5 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java +++ b/api/src/main/java/org/apache/iceberg/expressions/NamedReference.java @@ -38,6 +38,8 @@ public class NamedReference<T> implements UnboundTerm<T>, Reference<T> { @Override public BoundReference<T> bind(Types.StructType struct, boolean caseSensitive) { + ValidationException.check(name() != null, "Name cannot be null"); + Schema schema = struct.asSchema(); Types.NestedField field = caseSensitive ? schema.findField(name()) : schema.caseInsensitiveFindField(name()); diff --git a/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java b/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java index 27059d1d21..30f03e4a9e 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java +++ b/api/src/main/java/org/apache/iceberg/expressions/UnboundTransform.java @@ -36,17 +36,13 @@ public class UnboundTransform<S, T> implements UnboundTerm<T>, Term { return ref; } - public NamedReference<S> unboundRef() { - return ref; - } - public Transform<S, T> transform() { return transform; } @Override public BoundTransform<S, T> bind(Types.StructType struct, boolean caseSensitive) { - BoundReference<S> boundRef = (BoundReference<S>) ref.bind(struct, caseSensitive); + BoundReference<S> boundRef = ref.bind(struct, caseSensitive); try { ValidationException.check( @@ -54,11 +50,10 @@ public class UnboundTransform<S, T> implements UnboundTerm<T>, Term { "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), - ref.toString()); + ref); } catch (IllegalArgumentException e) { throw new ValidationException( - "Cannot bind: %s cannot transform %s values from '%s'", - transform, boundRef.type(), ref.name()); + "Cannot bind: %s cannot transform %s values from '%s'", transform, boundRef.type(), ref); } return new BoundTransform<>(boundRef, transform); diff --git a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java index e68a71c4a7..6aea3c6e60 100644 --- a/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java +++ b/core/src/main/java/org/apache/iceberg/expressions/ExpressionParser.java @@ -34,6 +34,7 @@ import org.apache.iceberg.SingleValueParser; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; +import org.apache.iceberg.transforms.Transform; import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.JsonUtil; @@ -51,6 +52,11 @@ public class ExpressionParser { private static final String REFERENCE = "reference"; private static final String LITERAL = "literal"; + public enum SerializationMode { + NAMES_ONLY, + WITH_FIELD_IDS + } + private ExpressionParser() {} public static String toJson(Expression expression) { @@ -62,27 +68,45 @@ public class ExpressionParser { return JsonUtil.generate(gen -> toJson(expression, gen), pretty); } + public static String toJson(Expression expression, boolean pretty, SerializationMode mode) { + Preconditions.checkArgument(expression != null, "Invalid expression: null"); + return JsonUtil.generate(gen -> toJson(expression, gen, mode), pretty); + } + + @Deprecated public static String toJson(Expression expression, boolean pretty, boolean includeFieldIds) { Preconditions.checkArgument(expression != null, "Invalid expression: null"); - return JsonUtil.generate(gen -> toJson(expression, gen, includeFieldIds), pretty); + return toJson( + expression, + pretty, + includeFieldIds ? SerializationMode.WITH_FIELD_IDS : SerializationMode.NAMES_ONLY); } public static void toJson(Expression expression, JsonGenerator gen) { - ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, false)); + ExpressionVisitors.visit( + expression, new JsonGeneratorVisitor(gen, SerializationMode.NAMES_ONLY)); + } + + public static void toJson(Expression expression, JsonGenerator gen, SerializationMode mode) { + ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, mode)); } + @Deprecated public static void toJson(Expression expression, JsonGenerator gen, boolean includeFieldIds) { - ExpressionVisitors.visit(expression, new JsonGeneratorVisitor(gen, includeFieldIds)); + toJson( + expression, + gen, + includeFieldIds ? SerializationMode.WITH_FIELD_IDS : SerializationMode.NAMES_ONLY); } private static class JsonGeneratorVisitor extends ExpressionVisitors.CustomOrderExpressionVisitor<Void> { private final JsonGenerator gen; - private final boolean includeFieldIds; + private final SerializationMode mode; - private JsonGeneratorVisitor(JsonGenerator gen, boolean includeFieldIds) { + private JsonGeneratorVisitor(JsonGenerator gen, SerializationMode mode) { this.gen = gen; - this.includeFieldIds = includeFieldIds; + this.mode = mode; } /** @@ -251,7 +275,7 @@ public class ExpressionParser { return; } else if (term instanceof BoundTransform) { BoundTransform<?, ?> transform = (BoundTransform<?, ?>) term; - if (includeFieldIds) { + if (mode == SerializationMode.WITH_FIELD_IDS) { transformWithFieldId( transform.transform().toString(), transform.ref().name(), transform.ref().fieldId()); } else { @@ -260,7 +284,7 @@ public class ExpressionParser { return; } else if (term instanceof BoundReference) { BoundReference<?> ref = (BoundReference<?>) term; - if (includeFieldIds) { + if (mode == SerializationMode.WITH_FIELD_IDS) { referenceWithFieldId(ref.name(), ref.fieldId()); } else { gen.writeString(ref.name()); @@ -295,8 +319,11 @@ public class ExpressionParser { private void referenceWithFieldId(String name, int fieldId) throws IOException { gen.writeStartObject(); - gen.writeStringField("type", "ref"); - gen.writeStringField("name", name); + gen.writeStringField("type", REFERENCE); + // Only write name if not null (support fieldId-only case) + if (name != null) { + gen.writeStringField("name", name); + } gen.writeNumberField("fieldId", fieldId); gen.writeEndObject(); } @@ -451,12 +478,39 @@ public class ExpressionParser { String type = JsonUtil.getString(TYPE, node); switch (type) { case REFERENCE: - return Expressions.ref(JsonUtil.getString(TERM, node)); + // Reference must have a name (via "name" or "term") + // fieldId is optional - if present, creates ResolvedReference, otherwise NamedReference + boolean hasTerm = node.has(TERM); + boolean hasName = node.has("name"); + boolean hasFieldId = node.has("fieldId"); + + String name = + hasName + ? JsonUtil.getString("name", node) + : hasTerm ? JsonUtil.getString(TERM, node) : null; + + if (name == null) { + throw new IllegalArgumentException( + "REFERENCE must have a name (via 'name' or 'term' field): " + node); + } + + if (hasFieldId) { + int fieldId = JsonUtil.getInt("fieldId", node); + return Expressions.ref(name, fieldId); + } else { + return Expressions.ref(name); + } case TRANSFORM: UnboundTerm<T> child = term(JsonUtil.get(TERM, node)); - String transform = JsonUtil.getString(TRANSFORM, node); - return (UnboundTerm<T>) - Expressions.transform(child.ref().name(), Transforms.fromString(transform)); + String transformStr = JsonUtil.getString(TRANSFORM, node); + NamedReference<?> ref; + if (child instanceof NamedReference) { + ref = (NamedReference<?>) child; + } else { + ref = child.ref(); + } + Transform<?, ?> transform = Transforms.fromString(transformStr); + return (UnboundTerm<T>) new UnboundTransform(ref, transform); default: throw new IllegalArgumentException("Cannot parse type as a reference: " + type); } diff --git a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java index ff04145917..bff158a2a1 100644 --- a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java +++ b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParserWithResolvedReference.java @@ -25,6 +25,7 @@ import static org.assertj.core.api.Assertions.assertThat; import java.math.BigDecimal; import java.util.UUID; import org.apache.iceberg.Schema; +import org.apache.iceberg.expressions.ExpressionParser.SerializationMode; import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; @@ -499,8 +500,9 @@ public class TestExpressionParserWithResolvedReference { assertThat(jsonWithoutFieldIds).doesNotContain("fieldId"); // Test serialization with field IDs (new behavior) - String jsonWithFieldIds = ExpressionParser.toJson(boundSimple, true, true); - assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\""); + String jsonWithFieldIds = + ExpressionParser.toJson(boundSimple, true, SerializationMode.WITH_FIELD_IDS); + assertThat(jsonWithFieldIds).contains("\"type\" : \"reference\""); assertThat(jsonWithFieldIds).contains("\"name\" : \"id\""); assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); @@ -511,7 +513,8 @@ public class TestExpressionParserWithResolvedReference { Expressions.greaterThan(Expressions.ref("id", 100), 50L)); Expression boundComplex = Binder.bind(STRUCT_TYPE, complexExpr, true); - String complexJsonWithFieldIds = ExpressionParser.toJson(boundComplex, true, true); + String complexJsonWithFieldIds = + ExpressionParser.toJson(boundComplex, true, SerializationMode.WITH_FIELD_IDS); assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 100"); assertThat(complexJsonWithFieldIds).contains("\"fieldId\" : 101"); assertThat(complexJsonWithFieldIds).contains("\"name\" : \"id\""); @@ -539,15 +542,17 @@ public class TestExpressionParserWithResolvedReference { assertThat(bucketJsonNoFieldIds).doesNotContain("fieldId"); // Test serialization with field IDs (new behavior) - String bucketJsonWithFieldIds = ExpressionParser.toJson(boundBucket, true, true); + String bucketJsonWithFieldIds = + ExpressionParser.toJson(boundBucket, true, SerializationMode.WITH_FIELD_IDS); assertThat(bucketJsonWithFieldIds).contains("\"transform\" : \"bucket[8]\""); - assertThat(bucketJsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(bucketJsonWithFieldIds).contains("\"type\" : \"reference\""); assertThat(bucketJsonWithFieldIds).contains("\"name\" : \"id\""); assertThat(bucketJsonWithFieldIds).contains("\"fieldId\" : 100"); - String dayJsonWithFieldIds = ExpressionParser.toJson(boundDay, true, true); + String dayJsonWithFieldIds = + ExpressionParser.toJson(boundDay, true, SerializationMode.WITH_FIELD_IDS); assertThat(dayJsonWithFieldIds).contains("\"transform\" : \"day\""); - assertThat(dayJsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(dayJsonWithFieldIds).contains("\"type\" : \"reference\""); assertThat(dayJsonWithFieldIds).contains("\"name\" : \"date\""); assertThat(dayJsonWithFieldIds).contains("\"fieldId\" : 107"); } @@ -568,7 +573,8 @@ public class TestExpressionParserWithResolvedReference { Expression bound = Binder.bind(STRUCT_TYPE, complexExpr, true); // Test serialization with field IDs - String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true); + String jsonWithFieldIds = + ExpressionParser.toJson(bound, true, SerializationMode.WITH_FIELD_IDS); // Verify all field IDs are present assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); // id field @@ -588,7 +594,8 @@ public class TestExpressionParserWithResolvedReference { assertThat(jsonWithFieldIds).contains("\"transform\" : \"truncate[4]\""); // Verify ResolvedReference structure for both transforms and direct references - // Count occurrences of "type" : "ref" to ensure all references use ResolvedReference format + // Count occurrences of "type" : "reference" to ensure all references use ResolvedReference + // format long refTypeCount = jsonWithFieldIds .lines() @@ -596,7 +603,7 @@ public class TestExpressionParserWithResolvedReference { line -> { int count = 0; int index = 0; - String pattern = "\"type\" : \"ref\""; + String pattern = "\"type\" : \"reference\""; while ((index = line.indexOf(pattern, index)) != -1) { count++; index += pattern.length(); @@ -610,7 +617,7 @@ public class TestExpressionParserWithResolvedReference { @Test public void testBoundExpressionRoundTripWithResolvedReference() { - // Test that bound expressions with ResolvedReference can be serialized and maintain information + // Test that bound expressions with ResolvedReference can be serialized and parsed back // Create expressions using both ResolvedTransform and ResolvedReference Expression originalExpr = @@ -622,25 +629,75 @@ public class TestExpressionParserWithResolvedReference { Expression bound = Binder.bind(STRUCT_TYPE, originalExpr, true); // Serialize with field IDs - String jsonWithFieldIds = ExpressionParser.toJson(bound, true, true); + String jsonWithFieldIds = + ExpressionParser.toJson(bound, true, SerializationMode.WITH_FIELD_IDS); // Verify the JSON structure contains complete ResolvedReference information - assertThat(jsonWithFieldIds).contains("\"type\" : \"ref\""); + assertThat(jsonWithFieldIds).contains("\"type\" : \"reference\""); assertThat(jsonWithFieldIds).contains("\"fieldId\" : 100"); assertThat(jsonWithFieldIds).contains("\"fieldId\" : 101"); - // Note: The current parser doesn't support parsing ResolvedReference format back to expressions - // This test documents the serialization capability for external systems that need field ID - // information + // Parse back from JSON with field IDs + Expression parsed = ExpressionParser.fromJson(jsonWithFieldIds, SCHEMA); - // Verify that the bound expression maintains the same field IDs after serialization - BoundPredicate<?> boundPred = (BoundPredicate<?>) ((And) bound).left(); - BoundTransform<?, ?> boundTransform = (BoundTransform<?, ?>) boundPred.term(); - assertThat(boundTransform.ref().fieldId()).isEqualTo(100); + // Verify parsed expression uses ResolvedReference + assertThat(parsed).isInstanceOf(And.class); + And parsedAnd = (And) parsed; + + UnboundPredicate<?> leftPred = (UnboundPredicate<?>) parsedAnd.left(); + assertThat(leftPred.term()).isInstanceOf(UnboundTransform.class); + UnboundTransform<?, ?> leftTransform = (UnboundTransform<?, ?>) leftPred.term(); + assertThat(leftTransform.ref()).isInstanceOf(ResolvedReference.class); + assertThat(((ResolvedReference<?>) leftTransform.ref()).fieldId()).isEqualTo(100); + + UnboundPredicate<?> rightPred = (UnboundPredicate<?>) parsedAnd.right(); + assertThat(rightPred.term()).isInstanceOf(ResolvedReference.class); + ResolvedReference<?> rightRef = (ResolvedReference<?>) rightPred.term(); + assertThat(rightRef.fieldId()).isEqualTo(101); + assertThat(rightRef.name()).isEqualTo("data"); + + // Verify that binding the parsed expression produces the same result + Expression parsedBound = Binder.bind(STRUCT_TYPE, parsed, true); + assertThat(parsedBound.toString()).isEqualTo(bound.toString()); + } + + @Test + public void testResolvedReferenceRoundTripWithFieldIds() { + // Test that ResolvedReference expressions with field IDs can be serialized and parsed back + Expression original = Expressions.equal(Expressions.ref("id", 100), 42L); + + // Serialize without binding (preserves ResolvedReference) + String json = ExpressionParser.toJson(original, true); - BoundPredicate<?> boundDataPred = (BoundPredicate<?>) ((And) bound).right(); - BoundReference<?> boundDataRef = (BoundReference<?>) boundDataPred.term(); - assertThat(boundDataRef.fieldId()).isEqualTo(101); + // For unbound expressions, field IDs are not included in JSON by default + // We need to manually create JSON with field IDs to test parsing + String jsonWithFieldIds = + "{\n" + + " \"type\" : \"eq\",\n" + + " \"term\" : {\n" + + " \"type\" : \"reference\",\n" + + " \"term\" : \"id\",\n" + + " \"fieldId\" : 100\n" + + " },\n" + + " \"value\" : 42\n" + + "}"; + + // Parse the JSON with field IDs + Expression parsed = ExpressionParser.fromJson(jsonWithFieldIds, SCHEMA); + + // Verify it creates a ResolvedReference + assertThat(parsed).isInstanceOf(UnboundPredicate.class); + UnboundPredicate<?> predicate = (UnboundPredicate<?>) parsed; + assertThat(predicate.term()).isInstanceOf(ResolvedReference.class); + + ResolvedReference<?> resolvedRef = (ResolvedReference<?>) predicate.term(); + assertThat(resolvedRef.name()).isEqualTo("id"); + assertThat(resolvedRef.fieldId()).isEqualTo(100); + + // Verify equivalence after binding + Expression originalBound = Binder.bind(STRUCT_TYPE, original, true); + Expression parsedBound = Binder.bind(STRUCT_TYPE, parsed, true); + assertThat(parsedBound.toString()).isEqualTo(originalBound.toString()); } @Test @@ -666,7 +723,7 @@ public class TestExpressionParserWithResolvedReference { Expression bound = Binder.bind(STRUCT_TYPE, expr, true); // Serialize with field IDs - String json = ExpressionParser.toJson(bound, true, true); + String json = ExpressionParser.toJson(bound, true, SerializationMode.WITH_FIELD_IDS); // Extract expected field ID from the original ResolvedReference UnboundPredicate<?> unboundPred = (UnboundPredicate<?>) expr; @@ -675,7 +732,7 @@ public class TestExpressionParserWithResolvedReference { // Verify the field ID is preserved in the JSON assertThat(json).contains("\"fieldId\" : " + expectedFieldId); - assertThat(json).contains("\"type\" : \"ref\""); + assertThat(json).contains("\"type\" : \"reference\""); } } }
