This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new a462ed2 API: Add UnboundSortOrder that has no schema (#4360)
a462ed2 is described below
commit a462ed254fe6e8a033379a866c3d759b7788b6c6
Author: Ryan Blue <[email protected]>
AuthorDate: Fri Mar 18 14:04:36 2022 -0700
API: Add UnboundSortOrder that has no schema (#4360)
---
.../main/java/org/apache/iceberg/SortOrder.java | 19 ++-
.../java/org/apache/iceberg/UnboundSortOrder.java | 143 +++++++++++++++++++++
.../java/org/apache/iceberg/MetadataUpdate.java | 8 +-
.../java/org/apache/iceberg/SortOrderParser.java | 59 ++++++++-
.../java/org/apache/iceberg/TableMetadata.java | 18 ++-
.../org/apache/iceberg/rest/RESTSerializers.java | 17 ++-
6 files changed, 231 insertions(+), 33 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/SortOrder.java
b/api/src/main/java/org/apache/iceberg/SortOrder.java
index 595af6d..4d0e3b1 100644
--- a/api/src/main/java/org/apache/iceberg/SortOrder.java
+++ b/api/src/main/java/org/apache/iceberg/SortOrder.java
@@ -135,6 +135,16 @@ public class SortOrder implements Serializable {
return fieldList;
}
+ UnboundSortOrder toUnbound() {
+ UnboundSortOrder.Builder builder =
UnboundSortOrder.builder().withOrderId(orderId);
+
+ for (SortField field : fields) {
+ builder.addSortField(field.transform().toString(), field.sourceId(),
field.direction(), field.nullOrder());
+ }
+
+ return builder.build();
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -244,7 +254,7 @@ public class SortOrder implements Serializable {
return this;
}
- Builder addSortField(Term term, SortDirection direction, NullOrder
nullOrder) {
+ private Builder addSortField(Term term, SortDirection direction, NullOrder
nullOrder) {
Preconditions.checkArgument(term instanceof UnboundTerm, "Term must be
unbound");
// ValidationException is thrown by bind if binding fails so we assume
that boundTerm is correct
BoundTerm<?> boundTerm = ((UnboundTerm<?>) term).bind(schema.asStruct(),
caseSensitive);
@@ -256,18 +266,13 @@ public class SortOrder implements Serializable {
Builder addSortField(String transformAsString, int sourceId, SortDirection
direction, NullOrder nullOrder) {
Types.NestedField column = schema.findField(sourceId);
- Preconditions.checkNotNull(column, "Cannot find source column: %s",
sourceId);
+ ValidationException.check(column != null, "Cannot find source column:
%s", sourceId);
Transform<?, ?> transform = Transforms.fromString(column.type(),
transformAsString);
SortField sortField = new SortField(transform, sourceId, direction,
nullOrder);
fields.add(sortField);
return this;
}
- Builder addSortField(Transform<?, ?> transform, int sourceId,
SortDirection direction, NullOrder nullOrder) {
- fields.add(new SortField(transform, sourceId, direction, nullOrder));
- return this;
- }
-
public SortOrder build() {
SortOrder sortOrder = buildUnchecked();
checkCompatibility(sortOrder, schema);
diff --git a/api/src/main/java/org/apache/iceberg/UnboundSortOrder.java
b/api/src/main/java/org/apache/iceberg/UnboundSortOrder.java
new file mode 100644
index 0000000..32ef18f
--- /dev/null
+++ b/api/src/main/java/org/apache/iceberg/UnboundSortOrder.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg;
+
+import java.util.Collections;
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+
+public class UnboundSortOrder {
+ private static final UnboundSortOrder UNSORTED_ORDER = new
UnboundSortOrder(0, Collections.emptyList());
+
+ private final int orderId;
+ private final List<UnboundSortField> fields;
+
+ private UnboundSortOrder(int orderId, List<UnboundSortField> fields) {
+ this.orderId = orderId;
+ this.fields = fields;
+ }
+
+ public SortOrder bind(Schema schema) {
+ SortOrder.Builder builder =
SortOrder.builderFor(schema).withOrderId(orderId);
+
+ for (UnboundSortField field : fields) {
+ builder.addSortField(field.transformAsString, field.sourceId,
field.direction, field.nullOrder);
+ }
+
+ return builder.build();
+ }
+
+ SortOrder bindUnchecked(Schema schema) {
+ SortOrder.Builder builder =
SortOrder.builderFor(schema).withOrderId(orderId);
+
+ for (UnboundSortField field : fields) {
+ builder.addSortField(field.transformAsString, field.sourceId,
field.direction, field.nullOrder);
+ }
+
+ return builder.buildUnchecked();
+ }
+
+ int orderId() {
+ return orderId;
+ }
+
+ List<UnboundSortField> fields() {
+ return fields;
+ }
+
+ /**
+ * Creates a new {@link SortOrder.Builder sort order builder} for unbound
sort orders.
+ *
+ * @return a sort order builder
+ */
+ static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * A builder used to create {@link UnboundSortOrder unbound sort orders}.
+ * <p>
+ * Call {@link #builder()} to create a new builder.
+ */
+ static class Builder {
+ private final List<UnboundSortField> fields = Lists.newArrayList();
+ private Integer orderId = null;
+
+ private Builder() {
+ }
+
+ Builder withOrderId(int newOrderId) {
+ this.orderId = newOrderId;
+ return this;
+ }
+
+ Builder addSortField(String transformAsString, int sourceId, SortDirection
direction, NullOrder nullOrder) {
+ fields.add(new UnboundSortField(transformAsString, sourceId, direction,
nullOrder));
+ return this;
+ }
+
+ UnboundSortOrder build() {
+ if (fields.isEmpty()) {
+ if (orderId != null && orderId != 0) {
+ throw new IllegalArgumentException("Unsorted order ID must be 0");
+ }
+ return UNSORTED_ORDER;
+ }
+
+ if (orderId != null && orderId == 0) {
+ throw new IllegalArgumentException("Sort order ID 0 is reserved for
unsorted order");
+ }
+
+ // default ID to 1 as 0 is reserved for unsorted order
+ int actualOrderId = orderId != null ? orderId : 1;
+ return new UnboundSortOrder(actualOrderId, fields);
+ }
+ }
+
+ static class UnboundSortField {
+ private final String transformAsString;
+ private final int sourceId;
+ private final SortDirection direction;
+ private final NullOrder nullOrder;
+
+ private UnboundSortField(String transformAsString, int sourceId,
SortDirection direction, NullOrder nullOrder) {
+ this.transformAsString = transformAsString;
+ this.sourceId = sourceId;
+ this.direction = direction;
+ this.nullOrder = nullOrder;
+ }
+
+ public String transformAsString() {
+ return transformAsString;
+ }
+
+ public int sourceId() {
+ return sourceId;
+ }
+
+ public SortDirection direction() {
+ return direction;
+ }
+
+ public NullOrder nullOrder() {
+ return nullOrder;
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
index f5c8e4a..3d39d14 100644
--- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
+++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java
@@ -139,13 +139,17 @@ public interface MetadataUpdate extends Serializable {
}
class AddSortOrder implements MetadataUpdate {
- private final SortOrder sortOrder;
+ private final UnboundSortOrder sortOrder;
public AddSortOrder(SortOrder sortOrder) {
+ this(sortOrder.toUnbound());
+ }
+
+ public AddSortOrder(UnboundSortOrder sortOrder) {
this.sortOrder = sortOrder;
}
- public SortOrder sortOrder() {
+ public UnboundSortOrder sortOrder() {
return sortOrder;
}
diff --git a/core/src/main/java/org/apache/iceberg/SortOrderParser.java
b/core/src/main/java/org/apache/iceberg/SortOrderParser.java
index a351e3a..002c859 100644
--- a/core/src/main/java/org/apache/iceberg/SortOrderParser.java
+++ b/core/src/main/java/org/apache/iceberg/SortOrderParser.java
@@ -92,23 +92,72 @@ public class SortOrderParser {
generator.writeEndArray();
}
- public static SortOrder fromJson(Schema schema, String json) {
+ public static void toJson(UnboundSortOrder sortOrder, JsonGenerator
generator) throws IOException {
+ generator.writeStartObject();
+ generator.writeNumberField(ORDER_ID, sortOrder.orderId());
+ generator.writeFieldName(FIELDS);
+ toJsonFields(sortOrder, generator);
+ generator.writeEndObject();
+ }
+
+ public static String toJson(UnboundSortOrder sortOrder) {
+ return toJson(sortOrder, false);
+ }
+
+ public static String toJson(UnboundSortOrder sortOrder, boolean pretty) {
try {
- return fromJson(schema, JsonUtil.mapper().readValue(json,
JsonNode.class));
+ StringWriter writer = new StringWriter();
+ JsonGenerator generator = JsonUtil.factory().createGenerator(writer);
+ if (pretty) {
+ generator.useDefaultPrettyPrinter();
+ }
+ toJson(sortOrder, generator);
+ generator.flush();
+ return writer.toString();
+
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
+ private static void toJsonFields(UnboundSortOrder sortOrder, JsonGenerator
generator) throws IOException {
+ generator.writeStartArray();
+ for (UnboundSortOrder.UnboundSortField field : sortOrder.fields()) {
+ generator.writeStartObject();
+ generator.writeStringField(TRANSFORM, field.transformAsString());
+ generator.writeNumberField(SOURCE_ID, field.sourceId());
+ generator.writeStringField(DIRECTION, toJson(field.direction()));
+ generator.writeStringField(NULL_ORDER, toJson(field.nullOrder()));
+ generator.writeEndObject();
+ }
+ generator.writeEndArray();
+ }
+
+ public static SortOrder fromJson(Schema schema, String json) {
+ return fromJson(json).bind(schema);
+ }
+
public static SortOrder fromJson(Schema schema, JsonNode json) {
+ return fromJson(json).bind(schema);
+ }
+
+ public static UnboundSortOrder fromJson(String json) {
+ try {
+ return fromJson(JsonUtil.mapper().readValue(json, JsonNode.class));
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ public static UnboundSortOrder fromJson(JsonNode json) {
Preconditions.checkArgument(json.isObject(), "Cannot parse sort order from
non-object: %s", json);
int orderId = JsonUtil.getInt(ORDER_ID, json);
- SortOrder.Builder builder =
SortOrder.builderFor(schema).withOrderId(orderId);
+ UnboundSortOrder.Builder builder =
UnboundSortOrder.builder().withOrderId(orderId);
buildFromJsonFields(builder, json.get(FIELDS));
return builder.build();
}
- private static void buildFromJsonFields(SortOrder.Builder builder, JsonNode
json) {
+ private static void buildFromJsonFields(UnboundSortOrder.Builder builder,
JsonNode json) {
Preconditions.checkArgument(json != null, "Cannot parse null sort order
fields");
Preconditions.checkArgument(json.isArray(), "Cannot parse sort order
fields, not an array: %s", json);
@@ -135,7 +184,7 @@ public class SortOrderParser {
}
private static NullOrder toNullOrder(String nullOrderingAsString) {
- switch (nullOrderingAsString) {
+ switch (nullOrderingAsString.toLowerCase(Locale.ROOT)) {
case "nulls-first":
return NULLS_FIRST;
case "nulls-last":
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java
b/core/src/main/java/org/apache/iceberg/TableMetadata.java
index 02af0ad..00bb047 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -644,16 +644,9 @@ public class TableMetadata implements Serializable {
}
private static SortOrder updateSortOrderSchema(Schema schema, SortOrder
sortOrder) {
- SortOrder.Builder builder =
SortOrder.builderFor(schema).withOrderId(sortOrder.orderId());
-
- // add all the fields to the builder. IDs should not change.
- for (SortField field : sortOrder.fields()) {
- builder.addSortField(field.transform(), field.sourceId(),
field.direction(), field.nullOrder());
- }
-
// build without validation because the schema may have changed in a way
that makes this order invalid. the order
// should still be preserved so that older metadata can be interpreted.
- return builder.buildUnchecked();
+ return sortOrder.toUnbound().bindUnchecked(schema);
}
private static PartitionSpec freshSpec(int specId, Schema schema,
PartitionSpec partitionSpec) {
@@ -674,7 +667,7 @@ public class TableMetadata implements Serializable {
}
private static SortOrder freshSortOrder(int orderId, Schema schema,
SortOrder sortOrder) {
- SortOrder.Builder builder = SortOrder.builderFor(schema);
+ UnboundSortOrder.Builder builder = UnboundSortOrder.builder();
if (sortOrder.isSorted()) {
builder.withOrderId(orderId);
@@ -692,7 +685,7 @@ public class TableMetadata implements Serializable {
field.nullOrder());
}
- return builder.build();
+ return builder.build().bind(schema);
}
private static Map<Long, Snapshot> indexAndValidateSnapshots(List<Snapshot>
snapshots, long lastSequenceNumber) {
@@ -964,6 +957,11 @@ public class TableMetadata implements Serializable {
return this;
}
+ public Builder addSortOrder(UnboundSortOrder order) {
+ addSortOrderInternal(order.bind(schemasById.get(currentSchemaId)));
+ return this;
+ }
+
public Builder addSortOrder(SortOrder order) {
addSortOrderInternal(order);
return this;
diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java
b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java
index 78e2015..737d581 100644
--- a/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java
+++ b/core/src/main/java/org/apache/iceberg/rest/RESTSerializers.java
@@ -33,8 +33,8 @@ import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
-import org.apache.iceberg.SortOrder;
import org.apache.iceberg.SortOrderParser;
+import org.apache.iceberg.UnboundSortOrder;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.catalog.TableIdentifierParser;
@@ -56,8 +56,8 @@ public class RESTSerializers {
.addDeserializer(Schema.class, new SchemaDeserializer())
.addSerializer(PartitionSpec.class, new PartitionSpecSerializer())
.addDeserializer(PartitionSpec.class, new PartitionSpecDeserializer())
- .addSerializer(SortOrder.class, new SortOrderSerializer())
- .addDeserializer(SortOrder.class, new SortOrderDeserializer());
+ .addSerializer(UnboundSortOrder.class, new
UnboundSortOrderSerializer())
+ .addDeserializer(UnboundSortOrder.class, new
UnboundSortOrderDeserializer());
mapper.registerModule(module);
}
@@ -133,20 +133,19 @@ public class RESTSerializers {
}
}
- public static class SortOrderSerializer extends JsonSerializer<SortOrder> {
+ public static class UnboundSortOrderSerializer extends
JsonSerializer<UnboundSortOrder> {
@Override
- public void serialize(SortOrder sortOrder, JsonGenerator gen,
SerializerProvider serializers)
+ public void serialize(UnboundSortOrder sortOrder, JsonGenerator gen,
SerializerProvider serializers)
throws IOException {
SortOrderParser.toJson(sortOrder, gen);
}
}
- public static class SortOrderDeserializer extends
JsonDeserializer<SortOrder> {
+ public static class UnboundSortOrderDeserializer extends
JsonDeserializer<UnboundSortOrder> {
@Override
- public SortOrder deserialize(JsonParser p, DeserializationContext context)
throws IOException {
+ public UnboundSortOrder deserialize(JsonParser p, DeserializationContext
context) throws IOException {
JsonNode jsonNode = p.getCodec().readTree(p);
- Schema schema = (Schema) context.getAttribute("schema");
- return SortOrderParser.fromJson(schema, jsonNode);
+ return SortOrderParser.fromJson(jsonNode);
}
}
}