nastra commented on code in PR #6934: URL: https://github.com/apache/iceberg/pull/6934#discussion_r1172069790
########## core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java: ########## @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import org.apache.iceberg.expressions.ExpressionUtil; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestFileScanTaskParser { + @Test + public void testNullArguments() { + Assertions.assertThatThrownBy(() -> FileScanTaskParser.toJson(null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid file scan task: null"); + + Assertions.assertThatThrownBy(() -> FileScanTaskParser.fromJson(null, true)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Invalid JSON string for file scan task: null"); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testParser(boolean caseSensitive) { + PartitionSpec spec = TableTestBase.SPEC; + FileScanTask fileScanTask = createScanTask(spec, caseSensitive); + String jsonStr = FileScanTaskParser.toJson(fileScanTask); + FileScanTask deserializedTask = FileScanTaskParser.fromJson(jsonStr, caseSensitive); + assertFileScanTaskEquals(fileScanTask, deserializedTask, spec, caseSensitive); + } + + private FileScanTask createScanTask(PartitionSpec spec, boolean caseSensitive) { + ResidualEvaluator residualEvaluator; + if (spec.isUnpartitioned()) { + residualEvaluator = ResidualEvaluator.unpartitioned(Expressions.alwaysTrue()); + } else { + residualEvaluator = ResidualEvaluator.of(spec, Expressions.equal("id", 1), caseSensitive); + } + + return new BaseFileScanTask( + TableTestBase.FILE_A, + new DeleteFile[] {TableTestBase.FILE_A_DELETES, TableTestBase.FILE_A2_DELETES}, + SchemaParser.toJson(TableTestBase.SCHEMA), + PartitionSpecParser.toJson(spec), + residualEvaluator); + } + + private static void assertFileScanTaskEquals( + FileScanTask expected, FileScanTask actual, PartitionSpec spec, boolean caseSensitive) { + TestContentFileParser.assertContentFileEquals(expected.file(), actual.file(), spec); + Assertions.assertThat(actual.deletes()).hasSameSizeAs(expected.deletes()); + for (int pos = 0; pos < expected.deletes().size(); ++pos) { + TestContentFileParser.assertContentFileEquals( + expected.deletes().get(pos), actual.deletes().get(pos), spec); + } + + Assertions.assertThat(expected.schema().sameSchema(actual.schema())) + .isTrue() + .as("Schema should match"); Review Comment: I believe the call to .as() needs to come before .isTrue(), otherwise the description is ignored ########## core/src/main/java/org/apache/iceberg/ContentFileParser.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class ContentFileParser { + private static final String SPEC_ID = "spec-id"; + private static final String CONTENT = "content"; + private static final String FILE_PATH = "file-path"; + private static final String FILE_FORMAT = "file-format"; + private static final String PARTITION = "partition"; + private static final String RECORD_COUNT = "record-count"; + private static final String FILE_SIZE = "file-size-in-bytes"; + private static final String COLUMN_SIZES = "column-sizes"; + private static final String VALUE_COUNTS = "value-counts"; + private static final String NULL_VALUE_COUNTS = "null-value-counts"; + private static final String NAN_VALUE_COUNTS = "nan-value-counts"; + private static final String LOWER_BOUNDS = "lower-bounds"; + private static final String UPPER_BOUNDS = "upper-bounds"; + private static final String KEY_METADATA = "key-metadata"; + private static final String SPLIT_OFFSETS = "split-offsets"; + private static final String EQUALITY_IDS = "equality-ids"; + private static final String SORT_ORDER_ID = "sort-order-id"; + + private ContentFileParser() {} + + private static boolean hasPartitionData(StructLike partitionData) { + return partitionData != null && partitionData.size() > 0; + } + + static String toJson(ContentFile<?> contentFile, PartitionSpec spec) { + return JsonUtil.generate( + generator -> ContentFileParser.toJson(contentFile, spec, generator), false); + } + + static void toJson(ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + Preconditions.checkArgument( Review Comment: minor: should this check maybe come before calling `generator.writeStartObject()`? ########## core/src/main/java/org/apache/iceberg/ContentFileParser.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class ContentFileParser { + private static final String SPEC_ID = "spec-id"; + private static final String CONTENT = "content"; + private static final String FILE_PATH = "file-path"; + private static final String FILE_FORMAT = "file-format"; + private static final String PARTITION = "partition"; + private static final String RECORD_COUNT = "record-count"; + private static final String FILE_SIZE = "file-size-in-bytes"; + private static final String COLUMN_SIZES = "column-sizes"; + private static final String VALUE_COUNTS = "value-counts"; + private static final String NULL_VALUE_COUNTS = "null-value-counts"; + private static final String NAN_VALUE_COUNTS = "nan-value-counts"; + private static final String LOWER_BOUNDS = "lower-bounds"; + private static final String UPPER_BOUNDS = "upper-bounds"; + private static final String KEY_METADATA = "key-metadata"; + private static final String SPLIT_OFFSETS = "split-offsets"; + private static final String EQUALITY_IDS = "equality-ids"; + private static final String SORT_ORDER_ID = "sort-order-id"; + + private ContentFileParser() {} + + private static boolean hasPartitionData(StructLike partitionData) { + return partitionData != null && partitionData.size() > 0; + } + + static String toJson(ContentFile<?> contentFile, PartitionSpec spec) { + return JsonUtil.generate( + generator -> ContentFileParser.toJson(contentFile, spec, generator), false); + } + + static void toJson(ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + Preconditions.checkArgument( + contentFile.specId() == spec.specId(), + "Invalid partition spec id from content file: expected = %s, actual = %s", + spec.specId(), + contentFile.specId()); + generator.writeNumberField(SPEC_ID, contentFile.specId()); + Review Comment: nit: line can probably be removed ########## core/src/main/java/org/apache/iceberg/ContentFileParser.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class ContentFileParser { + private static final String SPEC_ID = "spec-id"; + private static final String CONTENT = "content"; + private static final String FILE_PATH = "file-path"; + private static final String FILE_FORMAT = "file-format"; + private static final String PARTITION = "partition"; + private static final String RECORD_COUNT = "record-count"; + private static final String FILE_SIZE = "file-size-in-bytes"; + private static final String COLUMN_SIZES = "column-sizes"; + private static final String VALUE_COUNTS = "value-counts"; + private static final String NULL_VALUE_COUNTS = "null-value-counts"; + private static final String NAN_VALUE_COUNTS = "nan-value-counts"; + private static final String LOWER_BOUNDS = "lower-bounds"; + private static final String UPPER_BOUNDS = "upper-bounds"; + private static final String KEY_METADATA = "key-metadata"; + private static final String SPLIT_OFFSETS = "split-offsets"; + private static final String EQUALITY_IDS = "equality-ids"; + private static final String SORT_ORDER_ID = "sort-order-id"; + + private ContentFileParser() {} + + private static boolean hasPartitionData(StructLike partitionData) { + return partitionData != null && partitionData.size() > 0; + } + + static String toJson(ContentFile<?> contentFile, PartitionSpec spec) { + return JsonUtil.generate( + generator -> ContentFileParser.toJson(contentFile, spec, generator), false); + } + + static void toJson(ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + Preconditions.checkArgument( + contentFile.specId() == spec.specId(), + "Invalid partition spec id from content file: expected = %s, actual = %s", + spec.specId(), + contentFile.specId()); + generator.writeNumberField(SPEC_ID, contentFile.specId()); + + generator.writeStringField(CONTENT, contentFile.content().name()); + generator.writeStringField(FILE_PATH, contentFile.path().toString()); + generator.writeStringField(FILE_FORMAT, contentFile.format().name()); + + Preconditions.checkArgument( + spec.isPartitioned() == hasPartitionData(contentFile.partition()), + "Invalid partition data from content file: expected = %s, actual = %s", + spec.isPartitioned() ? "partitioned" : "unpartitioned", + hasPartitionData(contentFile.partition()) ? "partitioned" : "unpartitioned"); + if (contentFile.partition() != null) { + generator.writeFieldName(PARTITION); + SingleValueParser.toJson(spec.partitionType(), contentFile.partition(), generator); + } + + generator.writeNumberField(RECORD_COUNT, contentFile.recordCount()); Review Comment: minor: there's a slight inconsistency when writing & reading `RECORD_COUNT`. It is being read inside `metricsFromJson(..)`, but written here (I would have expected it to be written inside `metricsToJson(..)`) ########## core/src/main/java/org/apache/iceberg/ContentFileParser.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class ContentFileParser { + private static final String SPEC_ID = "spec-id"; + private static final String CONTENT = "content"; + private static final String FILE_PATH = "file-path"; + private static final String FILE_FORMAT = "file-format"; + private static final String PARTITION = "partition"; + private static final String RECORD_COUNT = "record-count"; + private static final String FILE_SIZE = "file-size-in-bytes"; + private static final String COLUMN_SIZES = "column-sizes"; + private static final String VALUE_COUNTS = "value-counts"; + private static final String NULL_VALUE_COUNTS = "null-value-counts"; + private static final String NAN_VALUE_COUNTS = "nan-value-counts"; + private static final String LOWER_BOUNDS = "lower-bounds"; + private static final String UPPER_BOUNDS = "upper-bounds"; + private static final String KEY_METADATA = "key-metadata"; + private static final String SPLIT_OFFSETS = "split-offsets"; + private static final String EQUALITY_IDS = "equality-ids"; + private static final String SORT_ORDER_ID = "sort-order-id"; + + private ContentFileParser() {} + + private static boolean hasPartitionData(StructLike partitionData) { + return partitionData != null && partitionData.size() > 0; + } + + static String toJson(ContentFile<?> contentFile, PartitionSpec spec) { + return JsonUtil.generate( + generator -> ContentFileParser.toJson(contentFile, spec, generator), false); + } + + static void toJson(ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + Preconditions.checkArgument( + contentFile.specId() == spec.specId(), + "Invalid partition spec id from content file: expected = %s, actual = %s", + spec.specId(), + contentFile.specId()); + generator.writeNumberField(SPEC_ID, contentFile.specId()); + + generator.writeStringField(CONTENT, contentFile.content().name()); + generator.writeStringField(FILE_PATH, contentFile.path().toString()); + generator.writeStringField(FILE_FORMAT, contentFile.format().name()); + + Preconditions.checkArgument( + spec.isPartitioned() == hasPartitionData(contentFile.partition()), + "Invalid partition data from content file: expected = %s, actual = %s", + spec.isPartitioned() ? "partitioned" : "unpartitioned", + hasPartitionData(contentFile.partition()) ? "partitioned" : "unpartitioned"); + if (contentFile.partition() != null) { + generator.writeFieldName(PARTITION); + SingleValueParser.toJson(spec.partitionType(), contentFile.partition(), generator); + } + + generator.writeNumberField(RECORD_COUNT, contentFile.recordCount()); + generator.writeNumberField(FILE_SIZE, contentFile.fileSizeInBytes()); + + metricsToJson(contentFile, generator); + + if (contentFile.keyMetadata() != null) { + generator.writeFieldName(KEY_METADATA); + SingleValueParser.toJson(DataFile.KEY_METADATA.type(), contentFile.keyMetadata(), generator); + } + + if (contentFile.splitOffsets() != null) { + generator.writeFieldName(SPLIT_OFFSETS); + SingleValueParser.toJson( Review Comment: when writing all of these fields we go through `SingleValueParser.toJson()` but when reading we go directly through `JsonUtil.getXyz(..)`, which seems a bit inconsistent. I'm wondering if it would be better to use `JsonUtil.writeLongArray(SPLIT_OFFSETS, contentFile.splitOffsets(), generator);` here ########## core/src/main/java/org/apache/iceberg/FileScanTaskParser.java: ########## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.util.JsonUtil; + +public class FileScanTaskParser { + private static final String SCHEMA = "schema"; + private static final String SPEC = "spec"; + private static final String DATA_FILE = "data-file"; + private static final String DELETE_FILES = "delete-files"; + private static final String RESIDUAL = "residual-filter"; + + private FileScanTaskParser() {} + + public static String toJson(FileScanTask fileScanTask) { + return JsonUtil.generate( + generator -> FileScanTaskParser.toJson(fileScanTask, generator), false); + } + + private static void toJson(FileScanTask fileScanTask, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(fileScanTask != null, "Invalid file scan task: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + generator.writeStartObject(); + + generator.writeFieldName(SCHEMA); + SchemaParser.toJson(fileScanTask.schema(), generator); + + generator.writeFieldName(SPEC); + PartitionSpec spec = fileScanTask.spec(); + PartitionSpecParser.toJson(spec, generator); + + if (fileScanTask.file() != null) { + generator.writeFieldName(DATA_FILE); + ContentFileParser.toJson(fileScanTask.file(), spec, generator); + } + + if (fileScanTask.deletes() != null) { + generator.writeArrayFieldStart(DELETE_FILES); + for (DeleteFile deleteFile : fileScanTask.deletes()) { + ContentFileParser.toJson(deleteFile, spec, generator); + } + generator.writeEndArray(); + } + + if (fileScanTask.residual() != null) { + generator.writeFieldName(RESIDUAL); + ExpressionParser.toJson(fileScanTask.residual(), generator); + } + + generator.writeEndObject(); + } + + public static FileScanTask fromJson(String json, boolean caseSensitive) { + Preconditions.checkArgument(json != null, "Invalid JSON string for file scan task: null"); + return JsonUtil.parse(json, node -> FileScanTaskParser.fromJsonNode(node, caseSensitive)); + } + + private static FileScanTask fromJsonNode(JsonNode jsonNode, boolean caseSensitive) { Review Comment: nit: I think we can just name this `fromJson()`. ########## core/src/main/java/org/apache/iceberg/FileScanTaskParser.java: ########## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.util.JsonUtil; + +public class FileScanTaskParser { + private static final String SCHEMA = "schema"; + private static final String SPEC = "spec"; + private static final String DATA_FILE = "data-file"; + private static final String DELETE_FILES = "delete-files"; + private static final String RESIDUAL = "residual-filter"; + + private FileScanTaskParser() {} + + public static String toJson(FileScanTask fileScanTask) { + return JsonUtil.generate( + generator -> FileScanTaskParser.toJson(fileScanTask, generator), false); + } + + private static void toJson(FileScanTask fileScanTask, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(fileScanTask != null, "Invalid file scan task: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + generator.writeStartObject(); + + generator.writeFieldName(SCHEMA); + SchemaParser.toJson(fileScanTask.schema(), generator); + + generator.writeFieldName(SPEC); + PartitionSpec spec = fileScanTask.spec(); + PartitionSpecParser.toJson(spec, generator); + + if (fileScanTask.file() != null) { + generator.writeFieldName(DATA_FILE); + ContentFileParser.toJson(fileScanTask.file(), spec, generator); + } + + if (fileScanTask.deletes() != null) { + generator.writeArrayFieldStart(DELETE_FILES); + for (DeleteFile deleteFile : fileScanTask.deletes()) { + ContentFileParser.toJson(deleteFile, spec, generator); + } + generator.writeEndArray(); + } + + if (fileScanTask.residual() != null) { + generator.writeFieldName(RESIDUAL); + ExpressionParser.toJson(fileScanTask.residual(), generator); + } + + generator.writeEndObject(); + } + + public static FileScanTask fromJson(String json, boolean caseSensitive) { + Preconditions.checkArgument(json != null, "Invalid JSON string for file scan task: null"); + return JsonUtil.parse(json, node -> FileScanTaskParser.fromJsonNode(node, caseSensitive)); + } + + private static FileScanTask fromJsonNode(JsonNode jsonNode, boolean caseSensitive) { + Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for file scan task: null"); + Preconditions.checkArgument( + jsonNode.isObject(), "Invalid JSON node for file scan task: non-object (%s)", jsonNode); + + JsonNode schemaNode = jsonNode.get(SCHEMA); + Schema schema = SchemaParser.fromJson(schemaNode); + String schemaString = SchemaParser.toJson(schema); + + JsonNode specNode = jsonNode.get(SPEC); Review Comment: same here, it's usually better to go through `JsonUtil.get(...)` to get proper error reporting when this field is missing ########## core/src/main/java/org/apache/iceberg/ContentFileParser.java: ########## @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.JsonUtil; + +class ContentFileParser { + private static final String SPEC_ID = "spec-id"; + private static final String CONTENT = "content"; + private static final String FILE_PATH = "file-path"; + private static final String FILE_FORMAT = "file-format"; + private static final String PARTITION = "partition"; + private static final String RECORD_COUNT = "record-count"; + private static final String FILE_SIZE = "file-size-in-bytes"; + private static final String COLUMN_SIZES = "column-sizes"; + private static final String VALUE_COUNTS = "value-counts"; + private static final String NULL_VALUE_COUNTS = "null-value-counts"; + private static final String NAN_VALUE_COUNTS = "nan-value-counts"; + private static final String LOWER_BOUNDS = "lower-bounds"; + private static final String UPPER_BOUNDS = "upper-bounds"; + private static final String KEY_METADATA = "key-metadata"; + private static final String SPLIT_OFFSETS = "split-offsets"; + private static final String EQUALITY_IDS = "equality-ids"; + private static final String SORT_ORDER_ID = "sort-order-id"; + + private ContentFileParser() {} + + private static boolean hasPartitionData(StructLike partitionData) { + return partitionData != null && partitionData.size() > 0; + } + + static String toJson(ContentFile<?> contentFile, PartitionSpec spec) { + return JsonUtil.generate( + generator -> ContentFileParser.toJson(contentFile, spec, generator), false); + } + + static void toJson(ContentFile<?> contentFile, PartitionSpec spec, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(contentFile != null, "Invalid content file: null"); + Preconditions.checkArgument(spec != null, "Invalid partition spec: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + + generator.writeStartObject(); + + // ignore the ordinal position (ContentFile#pos) of the file in a manifest, + // as it isn't used and BaseFile constructor doesn't support it. + + Preconditions.checkArgument( + contentFile.specId() == spec.specId(), + "Invalid partition spec id from content file: expected = %s, actual = %s", + spec.specId(), + contentFile.specId()); + generator.writeNumberField(SPEC_ID, contentFile.specId()); + + generator.writeStringField(CONTENT, contentFile.content().name()); + generator.writeStringField(FILE_PATH, contentFile.path().toString()); + generator.writeStringField(FILE_FORMAT, contentFile.format().name()); + + Preconditions.checkArgument( Review Comment: I think it would be good to move all precondition checks to the top before we call `generator.writeStartObject()`, wdyt? ########## core/src/main/java/org/apache/iceberg/FileScanTaskParser.java: ########## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonNode; +import java.io.IOException; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.ExpressionParser; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.ResidualEvaluator; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.util.JsonUtil; + +public class FileScanTaskParser { + private static final String SCHEMA = "schema"; + private static final String SPEC = "spec"; + private static final String DATA_FILE = "data-file"; + private static final String DELETE_FILES = "delete-files"; + private static final String RESIDUAL = "residual-filter"; + + private FileScanTaskParser() {} + + public static String toJson(FileScanTask fileScanTask) { + return JsonUtil.generate( + generator -> FileScanTaskParser.toJson(fileScanTask, generator), false); + } + + private static void toJson(FileScanTask fileScanTask, JsonGenerator generator) + throws IOException { + Preconditions.checkArgument(fileScanTask != null, "Invalid file scan task: null"); + Preconditions.checkArgument(generator != null, "Invalid JSON generator: null"); + generator.writeStartObject(); + + generator.writeFieldName(SCHEMA); + SchemaParser.toJson(fileScanTask.schema(), generator); + + generator.writeFieldName(SPEC); + PartitionSpec spec = fileScanTask.spec(); + PartitionSpecParser.toJson(spec, generator); + + if (fileScanTask.file() != null) { + generator.writeFieldName(DATA_FILE); + ContentFileParser.toJson(fileScanTask.file(), spec, generator); + } + + if (fileScanTask.deletes() != null) { + generator.writeArrayFieldStart(DELETE_FILES); + for (DeleteFile deleteFile : fileScanTask.deletes()) { + ContentFileParser.toJson(deleteFile, spec, generator); + } + generator.writeEndArray(); + } + + if (fileScanTask.residual() != null) { + generator.writeFieldName(RESIDUAL); + ExpressionParser.toJson(fileScanTask.residual(), generator); + } + + generator.writeEndObject(); + } + + public static FileScanTask fromJson(String json, boolean caseSensitive) { + Preconditions.checkArgument(json != null, "Invalid JSON string for file scan task: null"); + return JsonUtil.parse(json, node -> FileScanTaskParser.fromJsonNode(node, caseSensitive)); + } + + private static FileScanTask fromJsonNode(JsonNode jsonNode, boolean caseSensitive) { + Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for file scan task: null"); + Preconditions.checkArgument( + jsonNode.isObject(), "Invalid JSON node for file scan task: non-object (%s)", jsonNode); + + JsonNode schemaNode = jsonNode.get(SCHEMA); Review Comment: probably better to change this to `JsonUtil.get(SCHEMA, jsonNode)` to have proper error reporting. Also this can then be inlined with the next line -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
