This is an automated email from the ASF dual-hosted git repository.
stevenwu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 3cd2c528a8 Core: add JSON serialization for
BaseFilesTable.ManifestReadTask, AllManifestsTable.ManifestListReadTask, and
BaseEntriesTable.ManifestReadTask (#10735)
3cd2c528a8 is described below
commit 3cd2c528a83e14e6d50ca7c9c01b00ea53e5276e
Author: Steven Zhen Wu <[email protected]>
AuthorDate: Thu Aug 15 09:40:33 2024 -0700
Core: add JSON serialization for BaseFilesTable.ManifestReadTask,
AllManifestsTable.ManifestListReadTask, and BaseEntriesTable.ManifestReadTask
(#10735)
---
.palantir/revapi.yml | 15 +-
.../java/org/apache/iceberg/AllManifestsTable.java | 41 +++-
.../iceberg/AllManifestsTableTaskParser.java | 107 ++++++++
.../java/org/apache/iceberg/BaseEntriesTable.java | 52 ++--
.../java/org/apache/iceberg/BaseFilesTable.java | 51 ++--
.../org/apache/iceberg/FilesTableTaskParser.java | 99 ++++++++
.../org/apache/iceberg/GenericManifestFile.java | 36 +++
.../iceberg/ManifestEntriesTableTaskParser.java | 98 ++++++++
.../org/apache/iceberg/ManifestFileParser.java | 271 +++++++++++++++++++++
.../java/org/apache/iceberg/ScanTaskParser.java | 28 ++-
.../java/org/apache/iceberg/TableMetadata.java | 13 +-
.../java/org/apache/iceberg/io/FileIOParser.java | 4 +-
.../org/apache/iceberg/util/PartitionUtil.java | 10 +
.../iceberg/TestAllManifestsTableTaskParser.java | 152 ++++++++++++
.../apache/iceberg/TestFilesTableTaskParser.java | 132 ++++++++++
.../org/apache/iceberg/TestManifestFileParser.java | 111 +++++++++
16 files changed, 1167 insertions(+), 53 deletions(-)
diff --git a/.palantir/revapi.yml b/.palantir/revapi.yml
index e58ce70ded..9b8017f0be 100644
--- a/.palantir/revapi.yml
+++ b/.palantir/revapi.yml
@@ -1025,14 +1025,11 @@ acceptedBreaks:
new: "class org.apache.iceberg.types.Types.NestedField"
justification: "new Constructor added"
org.apache.iceberg:iceberg-core:
- - code: "java.method.visibilityReduced"
- old: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
- new: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
- justification: "Deprecations for 1.6.0 release"
- code: "java.element.noLongerDeprecated"
old: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
new: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
- justification: "Constructor became private as part of deprecations
cleanup for 1.6.0 release"
+ justification: "Constructor became private as part of deprecations
cleanup for\
+ \ 1.6.0 release"
- code: "java.element.noLongerDeprecated"
old: "method void
org.apache.iceberg.rest.auth.OAuth2Util.AuthSession::<init>(java.util.Map<java.lang.String,\
\ java.lang.String>, java.lang.String, java.lang.String,
java.lang.String,\
@@ -1056,6 +1053,10 @@ acceptedBreaks:
- code: "java.method.removed"
old: "method org.apache.iceberg.DataFiles.Builder
org.apache.iceberg.DataFiles.Builder::withEqualityFieldIds(java.util.List<java.lang.Integer>)"
justification: "Deprecations for 1.6.0 release"
+ - code: "java.method.visibilityReduced"
+ old: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
+ new: "method void
org.apache.iceberg.encryption.PlaintextEncryptionManager::<init>()"
+ justification: "Deprecations for 1.6.0 release"
"1.6.0":
org.apache.iceberg:iceberg-common:
- code: "java.method.removed"
@@ -1083,6 +1084,10 @@ acceptedBreaks:
\ java.lang.Object[]) throws java.lang.Exception"
justification: "Reduced visibility and scoped to package"
org.apache.iceberg:iceberg-core:
+ - code: "java.class.defaultSerializationChanged"
+ old: "class org.apache.iceberg.GenericManifestFile"
+ new: "class org.apache.iceberg.GenericManifestFile"
+ justification: "Serialization across versions is not supported"
- code: "java.class.removed"
old: "enum org.apache.iceberg.BaseMetastoreTableOperations.CommitStatus"
justification: "Removing deprecated code"
diff --git a/core/src/main/java/org/apache/iceberg/AllManifestsTable.java
b/core/src/main/java/org/apache/iceberg/AllManifestsTable.java
index a9c6f50e0b..641a7a3c9a 100644
--- a/core/src/main/java/org/apache/iceberg/AllManifestsTable.java
+++ b/core/src/main/java/org/apache/iceberg/AllManifestsTable.java
@@ -34,6 +34,7 @@ import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileIO;
+import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
@@ -52,7 +53,8 @@ public class AllManifestsTable extends BaseMetadataTable {
public static final Types.NestedField REF_SNAPSHOT_ID =
Types.NestedField.required(18, "reference_snapshot_id",
Types.LongType.get());
- private static final Schema MANIFEST_FILE_SCHEMA =
+ @VisibleForTesting
+ static final Schema MANIFEST_FILE_SCHEMA =
new Schema(
Types.NestedField.required(14, "content", Types.IntegerType.get()),
Types.NestedField.required(1, "path", Types.StringType.get()),
@@ -119,6 +121,7 @@ public class AllManifestsTable extends BaseMetadataTable {
protected CloseableIterable<FileScanTask> doPlanFiles() {
FileIO io = table().io();
Map<Integer, PartitionSpec> specs = Maps.newHashMap(table().specs());
+ Schema dataTableSchema = table().schema();
Expression filter = shouldIgnoreResiduals() ? Expressions.alwaysTrue() :
filter();
SnapshotEvaluator snapshotEvaluator =
@@ -132,7 +135,13 @@ public class AllManifestsTable extends BaseMetadataTable {
snap -> {
if (snap.manifestListLocation() != null) {
return new ManifestListReadTask(
- io, schema(), specs, snap.manifestListLocation(),
filter, snap.snapshotId());
+ dataTableSchema,
+ io,
+ schema(),
+ specs,
+ snap.manifestListLocation(),
+ filter,
+ snap.snapshotId());
} else {
return StaticDataTask.of(
io.newInputFile(
@@ -149,6 +158,7 @@ public class AllManifestsTable extends BaseMetadataTable {
}
static class ManifestListReadTask implements DataTask {
+ private final Schema dataTableSchema;
private final FileIO io;
private final Schema schema;
private final Map<Integer, PartitionSpec> specs;
@@ -158,12 +168,14 @@ public class AllManifestsTable extends BaseMetadataTable {
private DataFile lazyDataFile = null;
ManifestListReadTask(
+ Schema dataTableSchema,
FileIO io,
Schema schema,
Map<Integer, PartitionSpec> specs,
String manifestListLocation,
Expression residual,
long referenceSnapshotId) {
+ this.dataTableSchema = dataTableSchema;
this.io = io;
this.schema = schema;
this.specs = specs;
@@ -244,6 +256,31 @@ public class AllManifestsTable extends BaseMetadataTable {
public Iterable<FileScanTask> split(long splitSize) {
return ImmutableList.of(this); // don't split
}
+
+ @Override
+ public Schema schema() {
+ return schema;
+ }
+
+ Schema dataTableSchema() {
+ return dataTableSchema;
+ }
+
+ FileIO io() {
+ return io;
+ }
+
+ Map<Integer, PartitionSpec> specsById() {
+ return specs;
+ }
+
+ String manifestListLocation() {
+ return manifestListLocation;
+ }
+
+ long referenceSnapshotId() {
+ return referenceSnapshotId;
+ }
}
static StaticDataTask.Row manifestFileToRow(
diff --git
a/core/src/main/java/org/apache/iceberg/AllManifestsTableTaskParser.java
b/core/src/main/java/org/apache/iceberg/AllManifestsTableTaskParser.java
new file mode 100644
index 0000000000..24c28d9546
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/AllManifestsTableTaskParser.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.util.Map;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.ExpressionParser;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.FileIOParser;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.iceberg.util.PartitionUtil;
+
+class AllManifestsTableTaskParser {
+ private static final String DATA_TABLE_SCHEMA = "data-table-schema";
+ private static final String FILE_IO = "file-io";
+ private static final String SCHEMA = "schema";
+ private static final String SPECS = "partition-specs";
+ private static final String MANIFEST_LIST_LOCATION =
"manifest-list-Location";
+ private static final String RESIDUAL = "residual-filter";
+ private static final String REFERENCE_SNAPSHOT_ID = "reference-snapshot-id";
+
+ private AllManifestsTableTaskParser() {}
+
+ static void toJson(AllManifestsTable.ManifestListReadTask task,
JsonGenerator generator)
+ throws IOException {
+ Preconditions.checkArgument(task != null, "Invalid manifest task: null");
+ Preconditions.checkArgument(generator != null, "Invalid JSON generator:
null");
+
+ generator.writeFieldName(DATA_TABLE_SCHEMA);
+ SchemaParser.toJson(task.dataTableSchema(), generator);
+
+ generator.writeFieldName(FILE_IO);
+ FileIOParser.toJson(task.io(), generator);
+
+ generator.writeFieldName(SCHEMA);
+ SchemaParser.toJson(task.schema(), generator);
+
+ generator.writeArrayFieldStart(SPECS);
+ for (PartitionSpec spec : task.specsById().values()) {
+ PartitionSpecParser.toJson(spec, generator);
+ }
+
+ generator.writeEndArray();
+
+ generator.writeStringField(MANIFEST_LIST_LOCATION,
task.manifestListLocation());
+
+ generator.writeFieldName(RESIDUAL);
+ ExpressionParser.toJson(task.residual(), generator);
+
+ generator.writeNumberField(REFERENCE_SNAPSHOT_ID,
task.referenceSnapshotId());
+ }
+
+ static AllManifestsTable.ManifestListReadTask fromJson(JsonNode jsonNode) {
+ Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for
manifest task: null");
+ Preconditions.checkArgument(
+ jsonNode.isObject(), "Invalid JSON node for manifest task: non-object
(%s)", jsonNode);
+
+ Schema dataTableSchema =
SchemaParser.fromJson(JsonUtil.get(DATA_TABLE_SCHEMA, jsonNode));
+ FileIO fileIO = FileIOParser.fromJson(JsonUtil.get(FILE_IO, jsonNode),
null);
+ Schema schema = SchemaParser.fromJson(JsonUtil.get(SCHEMA, jsonNode));
+
+ JsonNode specsArray = JsonUtil.get(SPECS, jsonNode);
+ Preconditions.checkArgument(
+ specsArray.isArray(), "Invalid JSON node for partition specs:
non-array (%s)", specsArray);
+
+ ImmutableList.Builder<PartitionSpec> specsBuilder =
ImmutableList.builder();
+ for (JsonNode specNode : specsArray) {
+ PartitionSpec spec = PartitionSpecParser.fromJson(dataTableSchema,
specNode);
+ specsBuilder.add(spec);
+ }
+
+ Map<Integer, PartitionSpec> specsById =
PartitionUtil.indexSpecs(specsBuilder.build());
+ String manifestListLocation = JsonUtil.getString(MANIFEST_LIST_LOCATION,
jsonNode);
+ Expression residualFilter =
ExpressionParser.fromJson(JsonUtil.get(RESIDUAL, jsonNode));
+ long referenceSnapshotId = JsonUtil.getLong(REFERENCE_SNAPSHOT_ID,
jsonNode);
+
+ return new AllManifestsTable.ManifestListReadTask(
+ dataTableSchema,
+ fileIO,
+ schema,
+ specsById,
+ manifestListLocation,
+ residualFilter,
+ referenceSnapshotId);
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/BaseEntriesTable.java
b/core/src/main/java/org/apache/iceberg/BaseEntriesTable.java
index 526bb42ea6..ae77bff703 100644
--- a/core/src/main/java/org/apache/iceberg/BaseEntriesTable.java
+++ b/core/src/main/java/org/apache/iceberg/BaseEntriesTable.java
@@ -33,7 +33,6 @@ import org.apache.iceberg.expressions.ManifestEvaluator;
import org.apache.iceberg.expressions.ResidualEvaluator;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileIO;
-import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
@@ -92,15 +91,9 @@ abstract class BaseEntriesTable extends BaseMetadataTable {
evalCache.get(manifest.partitionSpecId()).eval(manifest)
&& manifestContentEvaluator.eval(manifest));
- String schemaString = SchemaParser.toJson(projectedSchema);
- String specString =
PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
- ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
-
return CloseableIterable.transform(
filteredManifests,
- manifest ->
- new ManifestReadTask(
- table, manifest, projectedSchema, schemaString, specString,
residuals));
+ manifest -> new ManifestReadTask(table, manifest, projectedSchema,
filter));
}
/**
@@ -283,19 +276,29 @@ abstract class BaseEntriesTable extends BaseMetadataTable
{
private final ManifestFile manifest;
private final Map<Integer, PartitionSpec> specsById;
+ private ManifestReadTask(
+ Table table, ManifestFile manifest, Schema projection, Expression
filter) {
+ this(table.schema(), table.io(), table.specs(), manifest, projection,
filter);
+ }
+
ManifestReadTask(
- Table table,
+ Schema dataTableSchema,
+ FileIO io,
+ Map<Integer, PartitionSpec> specsById,
ManifestFile manifest,
Schema projection,
- String schemaString,
- String specString,
- ResidualEvaluator residuals) {
- super(DataFiles.fromManifest(manifest), null, schemaString, specString,
residuals);
+ Expression filter) {
+ super(
+ DataFiles.fromManifest(manifest),
+ null,
+ SchemaParser.toJson(projection),
+ PartitionSpecParser.toJson(PartitionSpec.unpartitioned()),
+ ResidualEvaluator.unpartitioned(filter));
this.projection = projection;
- this.io = table.io();
+ this.io = io;
this.manifest = manifest;
- this.specsById = Maps.newHashMap(table.specs());
- this.dataTableSchema = table.schema();
+ this.specsById = Maps.newHashMap(specsById);
+ this.dataTableSchema = dataTableSchema;
Type fileProjectionType = projection.findType("data_file");
this.fileProjection =
@@ -311,7 +314,6 @@ abstract class BaseEntriesTable extends BaseMetadataTable {
+ (long) manifest.existingFilesCount();
}
- @VisibleForTesting
ManifestFile manifest() {
return manifest;
}
@@ -403,5 +405,21 @@ abstract class BaseEntriesTable extends BaseMetadataTable {
public Iterable<FileScanTask> split(long splitSize) {
return ImmutableList.of(this); // don't split
}
+
+ FileIO io() {
+ return io;
+ }
+
+ Map<Integer, PartitionSpec> specsById() {
+ return specsById;
+ }
+
+ Schema dataTableSchema() {
+ return dataTableSchema;
+ }
+
+ Schema projection() {
+ return projection;
+ }
}
}
diff --git a/core/src/main/java/org/apache/iceberg/BaseFilesTable.java
b/core/src/main/java/org/apache/iceberg/BaseFilesTable.java
index 149edf9500..b71744f739 100644
--- a/core/src/main/java/org/apache/iceberg/BaseFilesTable.java
+++ b/core/src/main/java/org/apache/iceberg/BaseFilesTable.java
@@ -29,7 +29,6 @@ import org.apache.iceberg.expressions.ManifestEvaluator;
import org.apache.iceberg.expressions.ResidualEvaluator;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileIO;
-import
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
@@ -80,16 +79,11 @@ abstract class BaseFilesTable extends BaseMetadataTable {
CloseableIterable.filter(
manifests, manifest ->
evalCache.get(manifest.partitionSpecId()).eval(manifest));
- String schemaString = SchemaParser.toJson(projectedSchema);
- String specString =
PartitionSpecParser.toJson(PartitionSpec.unpartitioned());
Expression filter = ignoreResiduals ? Expressions.alwaysTrue() : rowFilter;
- ResidualEvaluator residuals = ResidualEvaluator.unpartitioned(filter);
return CloseableIterable.transform(
filteredManifests,
- manifest ->
- new ManifestReadTask(
- table, manifest, projectedSchema, schemaString, specString,
residuals));
+ manifest -> new ManifestReadTask(table, manifest, projectedSchema,
filter));
}
abstract static class BaseFilesTableScan extends BaseMetadataTableScan {
@@ -140,18 +134,28 @@ abstract class BaseFilesTable extends BaseMetadataTable {
private final Schema dataTableSchema;
private final Schema projection;
+ private ManifestReadTask(
+ Table table, ManifestFile manifest, Schema projection, Expression
filter) {
+ this(table.schema(), table.io(), table.specs(), manifest, projection,
filter);
+ }
+
ManifestReadTask(
- Table table,
+ Schema dataTableSchema,
+ FileIO io,
+ Map<Integer, PartitionSpec> specsById,
ManifestFile manifest,
Schema projection,
- String schemaString,
- String specString,
- ResidualEvaluator residuals) {
- super(DataFiles.fromManifest(manifest), null, schemaString, specString,
residuals);
- this.io = table.io();
- this.specsById = Maps.newHashMap(table.specs());
+ Expression filter) {
+ super(
+ DataFiles.fromManifest(manifest),
+ null,
+ SchemaParser.toJson(projection),
+ PartitionSpecParser.toJson(PartitionSpec.unpartitioned()),
+ ResidualEvaluator.unpartitioned(filter));
+ this.io = io;
+ this.specsById = Maps.newHashMap(specsById);
this.manifest = manifest;
- this.dataTableSchema = table.schema();
+ this.dataTableSchema = dataTableSchema;
this.projection = projection;
}
@@ -240,9 +244,24 @@ abstract class BaseFilesTable extends BaseMetadataTable {
return ImmutableList.of(this); // don't split
}
- @VisibleForTesting
+ FileIO io() {
+ return io;
+ }
+
+ Map<Integer, PartitionSpec> specsById() {
+ return specsById;
+ }
+
ManifestFile manifest() {
return manifest;
}
+
+ Schema dataTableSchema() {
+ return dataTableSchema;
+ }
+
+ Schema projection() {
+ return projection;
+ }
}
}
diff --git a/core/src/main/java/org/apache/iceberg/FilesTableTaskParser.java
b/core/src/main/java/org/apache/iceberg/FilesTableTaskParser.java
new file mode 100644
index 0000000000..365deacebf
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/FilesTableTaskParser.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.util.Map;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.ExpressionParser;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.FileIOParser;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.iceberg.util.PartitionUtil;
+
+class FilesTableTaskParser {
+ private static final String FILE_IO = "file-io";
+ private static final String SPECS = "partition-specs";
+ private static final String SCHEMA = "schema";
+ private static final String PROJECTION = "projection";
+ private static final String RESIDUAL = "residual-filter";
+ private static final String MANIFEST = "manifest-file";
+
+ private FilesTableTaskParser() {}
+
+ static void toJson(BaseFilesTable.ManifestReadTask task, JsonGenerator
generator)
+ throws IOException {
+ Preconditions.checkArgument(task != null, "Invalid files task: null");
+ Preconditions.checkArgument(generator != null, "Invalid JSON generator:
null");
+
+ generator.writeFieldName(SCHEMA);
+ SchemaParser.toJson(task.dataTableSchema(), generator);
+
+ generator.writeFieldName(PROJECTION);
+ SchemaParser.toJson(task.projection(), generator);
+
+ generator.writeFieldName(FILE_IO);
+ FileIOParser.toJson(task.io(), generator);
+
+ generator.writeArrayFieldStart(SPECS);
+ for (PartitionSpec spec : task.specsById().values()) {
+ PartitionSpecParser.toJson(spec, generator);
+ }
+
+ generator.writeEndArray();
+
+ generator.writeFieldName(RESIDUAL);
+ ExpressionParser.toJson(task.residual(), generator);
+
+ generator.writeFieldName(MANIFEST);
+ ManifestFileParser.toJson(task.manifest(), generator);
+ }
+
+ static BaseFilesTable.ManifestReadTask fromJson(JsonNode jsonNode) {
+ Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for files
task: null");
+ Preconditions.checkArgument(
+ jsonNode.isObject(), "Invalid JSON node for files task: non-object
(%s)", jsonNode);
+
+ Schema dataTableSchema = SchemaParser.fromJson(JsonUtil.get(SCHEMA,
jsonNode));
+ Schema projection = SchemaParser.fromJson(JsonUtil.get(PROJECTION,
jsonNode));
+
+ FileIO fileIO = FileIOParser.fromJson(JsonUtil.get(FILE_IO, jsonNode),
null);
+
+ JsonNode specsArray = JsonUtil.get(SPECS, jsonNode);
+ Preconditions.checkArgument(
+ specsArray.isArray(), "Invalid JSON node for partition specs:
non-array (%s)", specsArray);
+
+ ImmutableList.Builder<PartitionSpec> specsBuilder =
ImmutableList.builder();
+ for (JsonNode specNode : specsArray) {
+ PartitionSpec spec = PartitionSpecParser.fromJson(dataTableSchema,
specNode);
+ specsBuilder.add(spec);
+ }
+
+ Map<Integer, PartitionSpec> specsById =
PartitionUtil.indexSpecs(specsBuilder.build());
+ Expression residualFilter =
ExpressionParser.fromJson(JsonUtil.get(RESIDUAL, jsonNode));
+ ManifestFile manifestFile =
ManifestFileParser.fromJson(JsonUtil.get(MANIFEST, jsonNode));
+
+ return new BaseFilesTable.ManifestReadTask(
+ dataTableSchema, fileIO, specsById, manifestFile, projection,
residualFilter);
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
index ed94da5e17..d081e0bdd5 100644
--- a/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
+++ b/core/src/main/java/org/apache/iceberg/GenericManifestFile.java
@@ -105,6 +105,42 @@ public class GenericManifestFile
this.keyMetadata = null;
}
+ /** Adjust the arg order to avoid conflict with the public constructor below
*/
+ GenericManifestFile(
+ String path,
+ long length,
+ int specId,
+ ManifestContent content,
+ long sequenceNumber,
+ long minSequenceNumber,
+ Long snapshotId,
+ List<PartitionFieldSummary> partitions,
+ ByteBuffer keyMetadata,
+ Integer addedFilesCount,
+ Long addedRowsCount,
+ Integer existingFilesCount,
+ Long existingRowsCount,
+ Integer deletedFilesCount,
+ Long deletedRowsCount) {
+ this.avroSchema = AVRO_SCHEMA;
+ this.manifestPath = path;
+ this.length = length;
+ this.specId = specId;
+ this.content = content;
+ this.sequenceNumber = sequenceNumber;
+ this.minSequenceNumber = minSequenceNumber;
+ this.snapshotId = snapshotId;
+ this.addedFilesCount = addedFilesCount;
+ this.addedRowsCount = addedRowsCount;
+ this.existingFilesCount = existingFilesCount;
+ this.existingRowsCount = existingRowsCount;
+ this.deletedFilesCount = deletedFilesCount;
+ this.deletedRowsCount = deletedRowsCount;
+ this.partitions = partitions == null ? null : partitions.toArray(new
PartitionFieldSummary[0]);
+ this.fromProjectionPos = null;
+ this.keyMetadata = ByteBuffers.toByteArray(keyMetadata);
+ }
+
public GenericManifestFile(
String path,
long length,
diff --git
a/core/src/main/java/org/apache/iceberg/ManifestEntriesTableTaskParser.java
b/core/src/main/java/org/apache/iceberg/ManifestEntriesTableTaskParser.java
new file mode 100644
index 0000000000..37f84d09ae
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/ManifestEntriesTableTaskParser.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.util.Map;
+import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.expressions.ExpressionParser;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.FileIOParser;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.iceberg.util.PartitionUtil;
+
+class ManifestEntriesTableTaskParser {
+ private static final String FILE_IO = "file-io";
+ private static final String SPECS = "partition-specs";
+ private static final String SCHEMA = "schema";
+ private static final String PROJECTION = "projection";
+ private static final String RESIDUAL = "residual-filter";
+ private static final String MANIFEST = "manifest-file";
+
+ private ManifestEntriesTableTaskParser() {}
+
+ static void toJson(BaseEntriesTable.ManifestReadTask task, JsonGenerator
generator)
+ throws IOException {
+ Preconditions.checkArgument(task != null, "Invalid manifest task: null");
+ Preconditions.checkArgument(generator != null, "Invalid JSON generator:
null");
+
+ generator.writeFieldName(SCHEMA);
+ SchemaParser.toJson(task.dataTableSchema(), generator);
+
+ generator.writeFieldName(FILE_IO);
+ FileIOParser.toJson(task.io(), generator);
+
+ generator.writeArrayFieldStart(SPECS);
+ for (PartitionSpec spec : task.specsById().values()) {
+ PartitionSpecParser.toJson(spec, generator);
+ }
+
+ generator.writeEndArray();
+
+ generator.writeFieldName(MANIFEST);
+ ManifestFileParser.toJson(task.manifest(), generator);
+
+ generator.writeFieldName(PROJECTION);
+ SchemaParser.toJson(task.projection(), generator);
+
+ generator.writeFieldName(RESIDUAL);
+ ExpressionParser.toJson(task.residual(), generator);
+ }
+
+ static BaseEntriesTable.ManifestReadTask fromJson(JsonNode jsonNode) {
+ Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for
manifest task: null");
+ Preconditions.checkArgument(
+ jsonNode.isObject(), "Invalid JSON node for manifest task: non-object
(%s)", jsonNode);
+
+ Schema dataTableSchema = SchemaParser.fromJson(JsonUtil.get(SCHEMA,
jsonNode));
+ FileIO fileIO = FileIOParser.fromJson(JsonUtil.get(FILE_IO, jsonNode),
null);
+
+ JsonNode specsArray = JsonUtil.get(SPECS, jsonNode);
+ Preconditions.checkArgument(
+ specsArray.isArray(), "Invalid JSON node for partition specs:
non-array (%s)", specsArray);
+ ImmutableList.Builder<PartitionSpec> specsBuilder =
ImmutableList.builder();
+ for (JsonNode specNode : specsArray) {
+ PartitionSpec spec = PartitionSpecParser.fromJson(dataTableSchema,
specNode);
+ specsBuilder.add(spec);
+ }
+
+ Map<Integer, PartitionSpec> specsById =
PartitionUtil.indexSpecs(specsBuilder.build());
+
+ ManifestFile manifestFile =
ManifestFileParser.fromJson(JsonUtil.get(MANIFEST, jsonNode));
+ Schema projection = SchemaParser.fromJson(JsonUtil.get(PROJECTION,
jsonNode));
+ Expression residualFilter =
ExpressionParser.fromJson(JsonUtil.get(RESIDUAL, jsonNode));
+
+ return new BaseEntriesTable.ManifestReadTask(
+ dataTableSchema, fileIO, specsById, manifestFile, projection,
residualFilter);
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/ManifestFileParser.java
b/core/src/main/java/org/apache/iceberg/ManifestFileParser.java
new file mode 100644
index 0000000000..80f1a0fcf7
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/ManifestFileParser.java
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.JsonUtil;
+
+class ManifestFileParser {
+ private static final String PATH = "path";
+ private static final String LENGTH = "length";
+ private static final String SPEC_ID = "partition-spec-id";
+ private static final String CONTENT = "content";
+ private static final String SEQUENCE_NUMBER = "sequence-number";
+ private static final String MIN_SEQUENCE_NUMBER = "min-sequence-number";
+ private static final String ADDED_SNAPSHOT_ID = "added-snapshot-id";
+ private static final String ADDED_FILES_COUNT = "added-files-count";
+ private static final String EXISTING_FILES_COUNT = "existing-files-count";
+ private static final String DELETED_FILES_COUNT = "deleted-files-count";
+ private static final String ADDED_ROWS_COUNT = "added-rows-count";
+ private static final String EXISTING_ROWS_COUNT = "existing-rows-count";
+ private static final String DELETED_ROWS_COUNT = "deleted-rows-count";
+ private static final String PARTITION_FIELD_SUMMARY =
"partition-field-summary";
+ private static final String KEY_METADATA = "key-metadata";
+
+ private ManifestFileParser() {}
+
+ static void toJson(ManifestFile manifestFile, JsonGenerator generator)
throws IOException {
+ Preconditions.checkArgument(manifestFile != null, "Invalid manifest file:
null");
+ Preconditions.checkArgument(generator != null, "Invalid JSON generator:
null");
+
+ generator.writeStartObject();
+
+ generator.writeStringField(PATH, manifestFile.path());
+ generator.writeNumberField(LENGTH, manifestFile.length());
+ generator.writeNumberField(SPEC_ID, manifestFile.partitionSpecId());
+
+ if (manifestFile.content() != null) {
+ generator.writeNumberField(CONTENT, manifestFile.content().id());
+ }
+
+ generator.writeNumberField(SEQUENCE_NUMBER, manifestFile.sequenceNumber());
+ generator.writeNumberField(MIN_SEQUENCE_NUMBER,
manifestFile.minSequenceNumber());
+
+ if (manifestFile.snapshotId() != null) {
+ generator.writeNumberField(ADDED_SNAPSHOT_ID, manifestFile.snapshotId());
+ }
+
+ if (manifestFile.addedFilesCount() != null) {
+ generator.writeNumberField(ADDED_FILES_COUNT,
manifestFile.addedFilesCount());
+ }
+
+ if (manifestFile.existingFilesCount() != null) {
+ generator.writeNumberField(EXISTING_FILES_COUNT,
manifestFile.existingFilesCount());
+ }
+
+ if (manifestFile.deletedFilesCount() != null) {
+ generator.writeNumberField(DELETED_FILES_COUNT,
manifestFile.deletedFilesCount());
+ }
+
+ if (manifestFile.addedRowsCount() != null) {
+ generator.writeNumberField(ADDED_ROWS_COUNT,
manifestFile.addedRowsCount());
+ }
+
+ if (manifestFile.existingRowsCount() != null) {
+ generator.writeNumberField(EXISTING_ROWS_COUNT,
manifestFile.existingRowsCount());
+ }
+
+ if (manifestFile.deletedRowsCount() != null) {
+ generator.writeNumberField(DELETED_ROWS_COUNT,
manifestFile.deletedRowsCount());
+ }
+
+ if (manifestFile.partitions() != null) {
+ generator.writeArrayFieldStart(PARTITION_FIELD_SUMMARY);
+ for (ManifestFile.PartitionFieldSummary summary :
manifestFile.partitions()) {
+ PartitionFieldSummaryParser.toJson(summary, generator);
+ }
+
+ generator.writeEndArray();
+ }
+
+ if (manifestFile.keyMetadata() != null) {
+ generator.writeFieldName(KEY_METADATA);
+ SingleValueParser.toJson(DataFile.KEY_METADATA.type(),
manifestFile.keyMetadata(), generator);
+ }
+
+ generator.writeEndObject();
+ }
+
+ static ManifestFile fromJson(JsonNode jsonNode) {
+ Preconditions.checkArgument(jsonNode != null, "Invalid JSON node for
manifest file: null");
+ Preconditions.checkArgument(
+ jsonNode.isObject(), "Invalid JSON node for manifest file: non-object
(%s)", jsonNode);
+
+ String path = JsonUtil.getString(PATH, jsonNode);
+ long length = JsonUtil.getLong(LENGTH, jsonNode);
+ int specId = JsonUtil.getInt(SPEC_ID, jsonNode);
+
+ ManifestContent manifestContent = null;
+ if (jsonNode.has(CONTENT)) {
+ manifestContent = ManifestContent.fromId(JsonUtil.getInt(CONTENT,
jsonNode));
+ }
+
+ long sequenceNumber = JsonUtil.getLong(SEQUENCE_NUMBER, jsonNode);
+ long minSequenceNumber = JsonUtil.getLong(MIN_SEQUENCE_NUMBER, jsonNode);
+
+ Long addedSnapshotId = null;
+ if (jsonNode.has(ADDED_SNAPSHOT_ID)) {
+ addedSnapshotId = JsonUtil.getLong(ADDED_SNAPSHOT_ID, jsonNode);
+ }
+
+ Integer addedFilesCount = null;
+ if (jsonNode.has(ADDED_FILES_COUNT)) {
+ addedFilesCount = JsonUtil.getInt(ADDED_FILES_COUNT, jsonNode);
+ }
+
+ Integer existingFilesCount = null;
+ if (jsonNode.has(EXISTING_FILES_COUNT)) {
+ existingFilesCount = JsonUtil.getInt(EXISTING_FILES_COUNT, jsonNode);
+ }
+
+ Integer deletedFilesCount = null;
+ if (jsonNode.has(DELETED_FILES_COUNT)) {
+ deletedFilesCount = JsonUtil.getInt(DELETED_FILES_COUNT, jsonNode);
+ }
+
+ Long addedRowsCount = null;
+ if (jsonNode.has(ADDED_ROWS_COUNT)) {
+ addedRowsCount = JsonUtil.getLong(ADDED_ROWS_COUNT, jsonNode);
+ }
+
+ Long existingRowsCount = null;
+ if (jsonNode.has(EXISTING_ROWS_COUNT)) {
+ existingRowsCount = JsonUtil.getLong(EXISTING_ROWS_COUNT, jsonNode);
+ }
+
+ Long deletedRowsCount = null;
+ if (jsonNode.has(DELETED_ROWS_COUNT)) {
+ deletedRowsCount = JsonUtil.getLong(DELETED_ROWS_COUNT, jsonNode);
+ }
+
+ List<ManifestFile.PartitionFieldSummary> partitionFieldSummaries = null;
+ if (jsonNode.has(PARTITION_FIELD_SUMMARY)) {
+ JsonNode summaryArray = JsonUtil.get(PARTITION_FIELD_SUMMARY, jsonNode);
+ Preconditions.checkArgument(
+ summaryArray.isArray(),
+ "Invalid JSON node for partition field summaries: non-array (%s)",
+ summaryArray);
+
+ ImmutableList.Builder<ManifestFile.PartitionFieldSummary> builder =
ImmutableList.builder();
+ for (JsonNode summaryNode : summaryArray) {
+ ManifestFile.PartitionFieldSummary summary =
+ PartitionFieldSummaryParser.fromJson(summaryNode);
+ builder.add(summary);
+ }
+
+ partitionFieldSummaries = builder.build();
+ }
+
+ ByteBuffer keyMetadata = JsonUtil.getByteBufferOrNull(KEY_METADATA,
jsonNode);
+
+ return new GenericManifestFile(
+ path,
+ length,
+ specId,
+ manifestContent,
+ sequenceNumber,
+ minSequenceNumber,
+ addedSnapshotId,
+ partitionFieldSummaries,
+ keyMetadata,
+ addedFilesCount,
+ addedRowsCount,
+ existingFilesCount,
+ existingRowsCount,
+ deletedFilesCount,
+ deletedRowsCount);
+ }
+
+ private static class PartitionFieldSummaryParser {
+ private static final String CONTAINS_NULL = "contains-null";
+ private static final String CONTAINS_NAN = "contains-nan";
+ private static final String LOWER_BOUND = "lower-bound";
+ private static final String UPPER_BOUND = "upper-bound";
+
+ private PartitionFieldSummaryParser() {}
+
+ static void toJson(ManifestFile.PartitionFieldSummary summary,
JsonGenerator generator)
+ throws IOException {
+ Preconditions.checkArgument(summary != null, "Invalid partition field
summary: null");
+ Preconditions.checkArgument(generator != null, "Invalid JSON generator:
null");
+
+ generator.writeStartObject();
+
+ generator.writeBooleanField(CONTAINS_NULL, summary.containsNull());
+
+ if (summary.containsNaN() != null) {
+ generator.writeBooleanField(CONTAINS_NAN, summary.containsNaN());
+ }
+
+ if (summary.lowerBound() != null) {
+ generator.writeFieldName(LOWER_BOUND);
+ SingleValueParser.toJson(Types.BinaryType.get(), summary.lowerBound(),
generator);
+ }
+
+ if (summary.upperBound() != null) {
+ generator.writeFieldName(UPPER_BOUND);
+ SingleValueParser.toJson(Types.BinaryType.get(), summary.upperBound(),
generator);
+ }
+
+ generator.writeEndObject();
+ }
+
+ static ManifestFile.PartitionFieldSummary fromJson(JsonNode jsonNode) {
+ Preconditions.checkArgument(
+ jsonNode != null, "Invalid JSON node for partition field summary:
null");
+ Preconditions.checkArgument(
+ jsonNode.isObject(),
+ "Invalid JSON node for partition field summary: non-object (%s)",
+ jsonNode);
+
+ boolean containsNull = JsonUtil.getBool(CONTAINS_NULL, jsonNode);
+ Boolean containsNaN = null;
+ if (jsonNode.has(CONTAINS_NAN)) {
+ containsNaN = JsonUtil.getBool(CONTAINS_NAN, jsonNode);
+ }
+
+ ByteBuffer lowerBound = null;
+ if (jsonNode.has(LOWER_BOUND)) {
+ lowerBound =
+ (ByteBuffer)
+ SingleValueParser.fromJson(Types.BinaryType.get(),
jsonNode.get(LOWER_BOUND));
+ }
+
+ ByteBuffer upperBound = null;
+ if (jsonNode.has(UPPER_BOUND)) {
+ upperBound =
+ (ByteBuffer)
+ SingleValueParser.fromJson(Types.BinaryType.get(),
jsonNode.get(UPPER_BOUND));
+ }
+
+ if (containsNaN != null) {
+ return new GenericPartitionFieldSummary(containsNull, containsNaN,
lowerBound, upperBound);
+ } else {
+ return new GenericPartitionFieldSummary(containsNull, lowerBound,
upperBound);
+ }
+ }
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/ScanTaskParser.java
b/core/src/main/java/org/apache/iceberg/ScanTaskParser.java
index 9447d0668a..67e44cea7d 100644
--- a/core/src/main/java/org/apache/iceberg/ScanTaskParser.java
+++ b/core/src/main/java/org/apache/iceberg/ScanTaskParser.java
@@ -30,7 +30,10 @@ public class ScanTaskParser {
private enum TaskType {
FILE_SCAN_TASK("file-scan-task"),
- DATA_TASK("data-task");
+ DATA_TASK("data-task"),
+ FILES_TABLE_TASK("files-table-task"),
+ ALL_MANIFESTS_TABLE_TASK("all-manifests-table-task"),
+ MANIFEST_ENTRIES_TABLE_TASK("manifest-entries-task");
private final String value;
@@ -45,6 +48,12 @@ public class ScanTaskParser {
return FILE_SCAN_TASK;
} else if (DATA_TASK.typeName().equalsIgnoreCase(value)) {
return DATA_TASK;
+ } else if (FILES_TABLE_TASK.typeName().equalsIgnoreCase(value)) {
+ return FILES_TABLE_TASK;
+ } else if (ALL_MANIFESTS_TABLE_TASK.typeName().equalsIgnoreCase(value)) {
+ return ALL_MANIFESTS_TABLE_TASK;
+ } else if
(MANIFEST_ENTRIES_TABLE_TASK.typeName().equalsIgnoreCase(value)) {
+ return MANIFEST_ENTRIES_TABLE_TASK;
} else {
throw new IllegalArgumentException("Unknown task type: " + value);
}
@@ -74,6 +83,17 @@ public class ScanTaskParser {
if (fileScanTask instanceof StaticDataTask) {
generator.writeStringField(TASK_TYPE, TaskType.DATA_TASK.typeName());
DataTaskParser.toJson((StaticDataTask) fileScanTask, generator);
+ } else if (fileScanTask instanceof BaseFilesTable.ManifestReadTask) {
+ generator.writeStringField(TASK_TYPE,
TaskType.FILES_TABLE_TASK.typeName());
+ FilesTableTaskParser.toJson((BaseFilesTable.ManifestReadTask)
fileScanTask, generator);
+ } else if (fileScanTask instanceof AllManifestsTable.ManifestListReadTask)
{
+ generator.writeStringField(TASK_TYPE,
TaskType.ALL_MANIFESTS_TABLE_TASK.typeName());
+ AllManifestsTableTaskParser.toJson(
+ (AllManifestsTable.ManifestListReadTask) fileScanTask, generator);
+ } else if (fileScanTask instanceof BaseEntriesTable.ManifestReadTask) {
+ generator.writeStringField(TASK_TYPE,
TaskType.MANIFEST_ENTRIES_TABLE_TASK.typeName());
+ ManifestEntriesTableTaskParser.toJson(
+ (BaseEntriesTable.ManifestReadTask) fileScanTask, generator);
} else if (fileScanTask instanceof BaseFileScanTask
|| fileScanTask instanceof BaseFileScanTask.SplitScanTask) {
generator.writeStringField(TASK_TYPE,
TaskType.FILE_SCAN_TASK.typeName());
@@ -98,6 +118,12 @@ public class ScanTaskParser {
return FileScanTaskParser.fromJson(jsonNode, caseSensitive);
case DATA_TASK:
return DataTaskParser.fromJson(jsonNode);
+ case FILES_TABLE_TASK:
+ return FilesTableTaskParser.fromJson(jsonNode);
+ case ALL_MANIFESTS_TABLE_TASK:
+ return AllManifestsTableTaskParser.fromJson(jsonNode);
+ case MANIFEST_ENTRIES_TABLE_TASK:
+ return ManifestEntriesTableTaskParser.fromJson(jsonNode);
default:
throw new UnsupportedOperationException("Unsupported task type: " +
taskType.typeName());
}
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java
b/core/src/main/java/org/apache/iceberg/TableMetadata.java
index 923db6bbd6..abb2ee6cc3 100644
--- a/core/src/main/java/org/apache/iceberg/TableMetadata.java
+++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java
@@ -43,6 +43,7 @@ import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.LocationUtil;
import org.apache.iceberg.util.Pair;
+import org.apache.iceberg.util.PartitionUtil;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.SerializableSupplier;
@@ -331,7 +332,7 @@ public class TableMetadata implements Serializable {
this.snapshotsById = indexAndValidateSnapshots(snapshots,
lastSequenceNumber);
this.schemasById = indexSchemas();
- this.specsById = indexSpecs(specs);
+ this.specsById = PartitionUtil.indexSpecs(specs);
this.sortOrdersById = indexSortOrders(sortOrders);
this.refs = validateRefs(currentSnapshotId, refs, snapshotsById);
this.statisticsFiles = ImmutableList.copyOf(statisticsFiles);
@@ -810,14 +811,6 @@ public class TableMetadata implements Serializable {
return builder.build();
}
- private static Map<Integer, PartitionSpec> indexSpecs(List<PartitionSpec>
specs) {
- ImmutableMap.Builder<Integer, PartitionSpec> builder =
ImmutableMap.builder();
- for (PartitionSpec spec : specs) {
- builder.put(spec.specId(), spec);
- }
- return builder.build();
- }
-
private static Map<Integer, SortOrder> indexSortOrders(List<SortOrder>
sortOrders) {
ImmutableMap.Builder<Integer, SortOrder> builder = ImmutableMap.builder();
for (SortOrder sortOrder : sortOrders) {
@@ -1054,7 +1047,7 @@ public class TableMetadata implements Serializable {
this.specs =
Lists.newArrayList(Iterables.transform(specs, spec ->
updateSpecSchema(schema, spec)));
specsById.clear();
- specsById.putAll(indexSpecs(specs));
+ specsById.putAll(PartitionUtil.indexSpecs(specs));
this.sortOrders =
Lists.newArrayList(
diff --git a/core/src/main/java/org/apache/iceberg/io/FileIOParser.java
b/core/src/main/java/org/apache/iceberg/io/FileIOParser.java
index 90ace239c2..2743b7b616 100644
--- a/core/src/main/java/org/apache/iceberg/io/FileIOParser.java
+++ b/core/src/main/java/org/apache/iceberg/io/FileIOParser.java
@@ -40,7 +40,7 @@ public class FileIOParser {
return JsonUtil.generate(gen -> toJson(io, gen), pretty);
}
- private static void toJson(FileIO io, JsonGenerator generator) throws
IOException {
+ public static void toJson(FileIO io, JsonGenerator generator) throws
IOException {
String impl = io.getClass().getName();
Map<String, String> properties;
try {
@@ -72,7 +72,7 @@ public class FileIOParser {
return JsonUtil.parse(json, node -> fromJson(node, conf));
}
- private static FileIO fromJson(JsonNode json, Object conf) {
+ public static FileIO fromJson(JsonNode json, Object conf) {
Preconditions.checkArgument(json.isObject(), "Cannot parse FileIO from
non-object: %s", json);
String impl = JsonUtil.getString(FILE_IO_IMPL, json);
Map<String, String> properties = JsonUtil.getStringMap(PROPERTIES, json);
diff --git a/core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
b/core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
index 4a5f0c792b..8373593931 100644
--- a/core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
+++ b/core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
@@ -26,6 +26,7 @@ import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;
@@ -98,4 +99,13 @@ public class PartitionUtil {
projection.wrap(partition);
return projection;
}
+
+ public static Map<Integer, PartitionSpec> indexSpecs(List<PartitionSpec>
specs) {
+ ImmutableMap.Builder<Integer, PartitionSpec> builder =
ImmutableMap.builder();
+ for (PartitionSpec spec : specs) {
+ builder.put(spec.specId(), spec);
+ }
+
+ return builder.build();
+ }
}
diff --git
a/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java
b/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java
new file mode 100644
index 0000000000..2f057d7bd5
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestAllManifestsTableTaskParser.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.Map;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.hadoop.HadoopFileIO;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.iceberg.util.PartitionUtil;
+import org.junit.jupiter.api.Test;
+
+public class TestAllManifestsTableTaskParser {
+ @Test
+ public void nullCheck() throws Exception {
+ StringWriter writer = new StringWriter();
+ JsonGenerator generator = JsonUtil.factory().createGenerator(writer);
+
+ assertThatThrownBy(() -> AllManifestsTableTaskParser.toJson(null,
generator))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid manifest task: null");
+
+ assertThatThrownBy(() -> AllManifestsTableTaskParser.toJson(createTask(),
null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON generator: null");
+
+ assertThatThrownBy(() -> AllManifestsTableTaskParser.fromJson(null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON node for manifest task: null");
+ }
+
+ @Test
+ public void invalidJsonNode() throws Exception {
+ String jsonStr = "{\"str\":\"1\", \"arr\":[]}";
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode rootNode = mapper.reader().readTree(jsonStr);
+
+ assertThatThrownBy(() ->
AllManifestsTableTaskParser.fromJson(rootNode.get("str")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for manifest task: non-object
");
+
+ assertThatThrownBy(() ->
AllManifestsTableTaskParser.fromJson(rootNode.get("arr")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for manifest task: non-object
");
+ }
+
+ @Test
+ public void testParser() {
+ AllManifestsTable.ManifestListReadTask task = createTask();
+ String jsonStr = ScanTaskParser.toJson(task);
+ assertThat(jsonStr).isEqualTo(taskJson());
+ AllManifestsTable.ManifestListReadTask deserializedTask =
+ (AllManifestsTable.ManifestListReadTask)
ScanTaskParser.fromJson(jsonStr, false);
+ assertTaskEquals(task, deserializedTask);
+ }
+
+ private AllManifestsTable.ManifestListReadTask createTask() {
+ Schema dataTableSchema = TestBase.SCHEMA;
+ HadoopFileIO fileIO = new HadoopFileIO();
+ fileIO.initialize(ImmutableMap.of("k1", "v1", "k2", "v2"));
+ Map<Integer, PartitionSpec> specsById =
+ PartitionUtil.indexSpecs(
+
Arrays.asList(PartitionSpec.builderFor(dataTableSchema).bucket("data",
16).build()));
+
+ return new AllManifestsTable.ManifestListReadTask(
+ dataTableSchema,
+ fileIO,
+ AllManifestsTable.MANIFEST_FILE_SCHEMA,
+ specsById,
+ "/path/manifest-list-file.avro",
+ Expressions.equal("id", 1),
+ 1L);
+ }
+
+ private String taskJson() {
+ return "{\"task-type\":\"all-manifests-table-task\","
+ + "\"data-table-schema\":{\"type\":\"struct\",\"schema-id\":0,"
+ +
"\"fields\":[{\"id\":3,\"name\":\"id\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]},"
+ +
"\"file-io\":{\"io-impl\":\"org.apache.iceberg.hadoop.HadoopFileIO\","
+ + "\"properties\":{\"k1\":\"v1\",\"k2\":\"v2\"}},"
+ + "\"schema\":{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{"
+ + "\"id\":14,\"name\":\"content\",\"required\":true,\"type\":\"int\"},"
+ + "{\"id\":1,\"name\":\"path\",\"required\":true,\"type\":\"string\"},"
+ + "{\"id\":2,\"name\":\"length\",\"required\":true,\"type\":\"long\"},"
+ +
"{\"id\":3,\"name\":\"partition_spec_id\",\"required\":false,\"type\":\"int\"},"
+ +
"{\"id\":4,\"name\":\"added_snapshot_id\",\"required\":false,\"type\":\"long\"},"
+ +
"{\"id\":5,\"name\":\"added_data_files_count\",\"required\":false,\"type\":\"int\"},"
+ +
"{\"id\":6,\"name\":\"existing_data_files_count\",\"required\":false,\"type\":\"int\"},"
+ +
"{\"id\":7,\"name\":\"deleted_data_files_count\",\"required\":false,\"type\":\"int\"},"
+ +
"{\"id\":15,\"name\":\"added_delete_files_count\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":16,\"name\":\"existing_delete_files_count\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":17,\"name\":\"deleted_delete_files_count\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":8,\"name\":\"partition_summaries\",\"required\":false,\"type\":"
+ +
"{\"type\":\"list\",\"element-id\":9,\"element\":{\"type\":\"struct\",\"fields\":[{"
+ +
"\"id\":10,\"name\":\"contains_null\",\"required\":true,\"type\":\"boolean\"},"
+ +
"{\"id\":11,\"name\":\"contains_nan\",\"required\":true,\"type\":\"boolean\"},"
+ +
"{\"id\":12,\"name\":\"lower_bound\",\"required\":false,\"type\":\"string\"},"
+ +
"{\"id\":13,\"name\":\"upper_bound\",\"required\":false,\"type\":\"string\"}]},\"element-required\":true}},"
+ +
"{\"id\":18,\"name\":\"reference_snapshot_id\",\"required\":true,\"type\":\"long\"}]},"
+ +
"\"partition-specs\":[{\"spec-id\":0,\"fields\":[{\"name\":\"data_bucket\","
+ + "\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}],"
+ + "\"manifest-list-Location\":\"/path/manifest-list-file.avro\","
+ + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1},"
+ + "\"reference-snapshot-id\":1}";
+ }
+
+ private void assertTaskEquals(
+ AllManifestsTable.ManifestListReadTask expected,
+ AllManifestsTable.ManifestListReadTask actual) {
+
+ HadoopFileIO expectedIO = (HadoopFileIO) expected.io();
+ HadoopFileIO actualIO = (HadoopFileIO) expected.io();
+ assertThat(actualIO.properties()).isEqualTo(expectedIO.properties());
+
+ assertThat(actual.dataTableSchema().asStruct())
+ .as("Data table schema should match")
+ .isEqualTo(expected.dataTableSchema().asStruct());
+ assertThat(actual.schema().asStruct())
+ .as("Schema should match")
+ .isEqualTo(expected.schema().asStruct());
+
+ assertThat(actual.specsById()).isEqualTo(expected.specsById());
+
assertThat(actual.manifestListLocation()).isEqualTo(expected.manifestListLocation());
+
assertThat(actual.residual().toString()).isEqualTo(expected.residual().toString());
+
assertThat(actual.referenceSnapshotId()).isEqualTo(expected.referenceSnapshotId());
+ }
+}
diff --git
a/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java
b/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java
new file mode 100644
index 0000000000..bea6060137
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestFilesTableTaskParser.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.Map;
+import org.apache.iceberg.expressions.Expressions;
+import org.apache.iceberg.hadoop.HadoopFileIO;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.util.JsonUtil;
+import org.apache.iceberg.util.PartitionUtil;
+import org.junit.jupiter.api.Test;
+
+public class TestFilesTableTaskParser {
+ @Test
+ public void nullCheck() throws Exception {
+ StringWriter writer = new StringWriter();
+ JsonGenerator generator = JsonUtil.factory().createGenerator(writer);
+
+ assertThatThrownBy(() -> FilesTableTaskParser.toJson(null, generator))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid files task: null");
+
+ assertThatThrownBy(() -> FilesTableTaskParser.toJson(createTask(), null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON generator: null");
+
+ assertThatThrownBy(() -> FilesTableTaskParser.fromJson(null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON node for files task: null");
+ }
+
+ @Test
+ public void invalidJsonNode() throws Exception {
+ String jsonStr = "{\"str\":\"1\", \"arr\":[]}";
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode rootNode = mapper.reader().readTree(jsonStr);
+
+ assertThatThrownBy(() ->
FilesTableTaskParser.fromJson(rootNode.get("str")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for files task: non-object ");
+
+ assertThatThrownBy(() ->
FilesTableTaskParser.fromJson(rootNode.get("arr")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for files task: non-object ");
+ }
+
+ @Test
+ public void testParser() {
+ BaseFilesTable.ManifestReadTask task = createTask();
+ String jsonStr = ScanTaskParser.toJson(task);
+ assertThat(jsonStr).isEqualTo(taskJson());
+ BaseFilesTable.ManifestReadTask deserializedTask =
+ (BaseFilesTable.ManifestReadTask) ScanTaskParser.fromJson(jsonStr,
false);
+ assertTaskEquals(task, deserializedTask);
+ }
+
+ private BaseFilesTable.ManifestReadTask createTask() {
+ Schema schema = TestBase.SCHEMA;
+ HadoopFileIO fileIO = new HadoopFileIO();
+ fileIO.initialize(ImmutableMap.of("k1", "v1", "k2", "v2"));
+ Map<Integer, PartitionSpec> specsById =
+ PartitionUtil.indexSpecs(
+ Arrays.asList(PartitionSpec.builderFor(schema).bucket("data",
16).build()));
+ ManifestFile manifestFile = TestManifestFileParser.createManifestFile();
+ return new BaseFilesTable.ManifestReadTask(
+ schema, fileIO, specsById, manifestFile, schema,
Expressions.equal("id", 1));
+ }
+
+ private String taskJson() {
+ return "{\"task-type\":\"files-table-task\","
+ + "\"schema\":{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{"
+ + "\"id\":3,\"name\":\"id\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]},"
+ + "\"projection\":{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{"
+ + "\"id\":3,\"name\":\"id\",\"required\":true,\"type\":\"int\"},"
+ +
"{\"id\":4,\"name\":\"data\",\"required\":true,\"type\":\"string\"}]},"
+ +
"\"file-io\":{\"io-impl\":\"org.apache.iceberg.hadoop.HadoopFileIO\","
+ + "\"properties\":{\"k1\":\"v1\",\"k2\":\"v2\"}},"
+ + "\"partition-specs\":[{\"spec-id\":0,\"fields\":[{"
+ +
"\"name\":\"data_bucket\",\"transform\":\"bucket[16]\",\"source-id\":4,\"field-id\":1000}]}],"
+ + "\"residual-filter\":{\"type\":\"eq\",\"term\":\"id\",\"value\":1},"
+ + "\"manifest-file\":{\"path\":\"/path/input.m0.avro\","
+ +
"\"length\":5878,\"partition-spec-id\":0,\"content\":0,\"sequence-number\":1,\"min-sequence-number\":2,"
+ + "\"added-snapshot-id\":12345678901234567,"
+ +
"\"added-files-count\":1,\"existing-files-count\":3,\"deleted-files-count\":0,"
+ +
"\"added-rows-count\":10,\"existing-rows-count\":30,\"deleted-rows-count\":0,"
+ +
"\"partition-field-summary\":[{\"contains-null\":true,\"contains-nan\":false,"
+ +
"\"lower-bound\":\"0A000000\",\"upper-bound\":\"64000000\"}],\"key-metadata\":\"DB030000\"}}";
+ }
+
+ private void assertTaskEquals(
+ BaseFilesTable.ManifestReadTask expected,
BaseFilesTable.ManifestReadTask actual) {
+ assertThat(actual.schema().asStruct())
+ .as("Schema should match")
+ .isEqualTo(expected.schema().asStruct());
+ assertThat(actual.projection().asStruct())
+ .as("Projected schema should match")
+ .isEqualTo(expected.projection().asStruct());
+
+ HadoopFileIO expectedIO = (HadoopFileIO) expected.io();
+ HadoopFileIO actualIO = (HadoopFileIO) expected.io();
+ assertThat(actualIO.properties()).isEqualTo(expectedIO.properties());
+
+ assertThat(actual.specsById()).isEqualTo(expected.specsById());
+
assertThat(actual.residual().toString()).isEqualTo(expected.residual().toString());
+ assertThat(actual.manifest()).isEqualTo(expected.manifest());
+ }
+}
diff --git a/core/src/test/java/org/apache/iceberg/TestManifestFileParser.java
b/core/src/test/java/org/apache/iceberg/TestManifestFileParser.java
new file mode 100644
index 0000000000..5a6e99c984
--- /dev/null
+++ b/core/src/test/java/org/apache/iceberg/TestManifestFileParser.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.StringWriter;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.iceberg.types.Conversions;
+import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.JsonUtil;
+import org.junit.jupiter.api.Test;
+
+public class TestManifestFileParser {
+ @Test
+ public void nullCheck() throws Exception {
+ StringWriter writer = new StringWriter();
+ JsonGenerator generator = JsonUtil.factory().createGenerator(writer);
+
+ assertThatThrownBy(() -> ManifestFileParser.toJson(null, generator))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid manifest file: null");
+
+ assertThatThrownBy(() -> ManifestFileParser.toJson(createManifestFile(),
null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON generator: null");
+
+ assertThatThrownBy(() -> ManifestFileParser.fromJson(null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Invalid JSON node for manifest file: null");
+ }
+
+ @Test
+ public void invalidJsonNode() throws Exception {
+ String jsonStr = "{\"str\":\"1\", \"arr\":[]}";
+ ObjectMapper mapper = new ObjectMapper();
+ JsonNode rootNode = mapper.reader().readTree(jsonStr);
+
+ assertThatThrownBy(() -> ManifestFileParser.fromJson(rootNode.get("str")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for manifest file: non-object
");
+
+ assertThatThrownBy(() -> ManifestFileParser.fromJson(rootNode.get("arr")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Invalid JSON node for manifest file: non-object
");
+ }
+
+ @Test
+ public void testParser() throws Exception {
+ ManifestFile manifest = createManifestFile();
+ String jsonStr = JsonUtil.generate(gen ->
ManifestFileParser.toJson(manifest, gen), false);
+ assertThat(jsonStr).isEqualTo(manifestFileJson());
+ }
+
+ static ManifestFile createManifestFile() {
+ ByteBuffer lowerBound = Conversions.toByteBuffer(Types.IntegerType.get(),
10);
+ ByteBuffer upperBound = Conversions.toByteBuffer(Types.IntegerType.get(),
100);
+ List<ManifestFile.PartitionFieldSummary> partitionFieldSummaries =
+ Arrays.asList(new GenericPartitionFieldSummary(true, false,
lowerBound, upperBound));
+ ByteBuffer keyMetadata = Conversions.toByteBuffer(Types.IntegerType.get(),
987);
+
+ return new GenericManifestFile(
+ "/path/input.m0.avro",
+ 5878L,
+ 0,
+ ManifestContent.DATA,
+ 1L,
+ 2L,
+ 12345678901234567L,
+ 1,
+ 10L,
+ 3,
+ 30L,
+ 0,
+ 0L,
+ partitionFieldSummaries,
+ keyMetadata);
+ }
+
+ private String manifestFileJson() {
+ return "{\"path\":\"/path/input.m0.avro\","
+ +
"\"length\":5878,\"partition-spec-id\":0,\"content\":0,\"sequence-number\":1,\"min-sequence-number\":2,"
+ +
"\"added-snapshot-id\":12345678901234567,\"added-files-count\":1,\"existing-files-count\":3,\"deleted-files-count\":0,"
+ +
"\"added-rows-count\":10,\"existing-rows-count\":30,\"deleted-rows-count\":0,"
+ +
"\"partition-field-summary\":[{\"contains-null\":true,\"contains-nan\":false,"
+ + "\"lower-bound\":\"0A000000\",\"upper-bound\":\"64000000\"}],"
+ + "\"key-metadata\":\"DB030000\"}";
+ }
+}