This is an automated email from the ASF dual-hosted git repository.
stevenwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 21d85642f4 Core: add missing start and length for FileScanTaskParser.
Also added `schema()` override for BaseFileScanTask$SplitScanTask class. (#7936)
21d85642f4 is described below
commit 21d85642f492169f7c3aaf39802a4bbe73873aa1
Author: Steven Zhen Wu <[email protected]>
AuthorDate: Mon Jul 3 08:06:01 2023 -0700
Core: add missing start and length for FileScanTaskParser. Also added
`schema()` override for BaseFileScanTask$SplitScanTask class. (#7936)
---
core/src/main/java/org/apache/iceberg/BaseFileScanTask.java | 5 +++++
.../src/main/java/org/apache/iceberg/FileScanTaskParser.java | 12 +++++++++++-
.../test/java/org/apache/iceberg/TestFileScanTaskParser.java | 1 +
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/BaseFileScanTask.java
b/core/src/main/java/org/apache/iceberg/BaseFileScanTask.java
index bff2d724f7..f53b56696e 100644
--- a/core/src/main/java/org/apache/iceberg/BaseFileScanTask.java
+++ b/core/src/main/java/org/apache/iceberg/BaseFileScanTask.java
@@ -80,6 +80,11 @@ public class BaseFileScanTask extends
BaseContentScanTask<FileScanTask, DataFile
return fileScanTask.deletes();
}
+ @Override
+ public Schema schema() {
+ return fileScanTask.schema();
+ }
+
@Override
public PartitionSpec spec() {
return fileScanTask.spec();
diff --git a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java
b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java
index b747eff98b..0a708f2668 100644
--- a/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java
+++ b/core/src/main/java/org/apache/iceberg/FileScanTaskParser.java
@@ -33,6 +33,8 @@ public class FileScanTaskParser {
private static final String SCHEMA = "schema";
private static final String SPEC = "spec";
private static final String DATA_FILE = "data-file";
+ private static final String START = "start";
+ private static final String LENGTH = "length";
private static final String DELETE_FILES = "delete-files";
private static final String RESIDUAL = "residual-filter";
@@ -61,6 +63,9 @@ public class FileScanTaskParser {
ContentFileParser.toJson(fileScanTask.file(), spec, generator);
}
+ generator.writeNumberField(START, fileScanTask.start());
+ generator.writeNumberField(LENGTH, fileScanTask.length());
+
if (fileScanTask.deletes() != null) {
generator.writeArrayFieldStart(DELETE_FILES);
for (DeleteFile deleteFile : fileScanTask.deletes()) {
@@ -98,6 +103,9 @@ public class FileScanTaskParser {
dataFile = (DataFile)
ContentFileParser.fromJson(jsonNode.get(DATA_FILE), spec);
}
+ long start = JsonUtil.getLong(START, jsonNode);
+ long length = JsonUtil.getLong(LENGTH, jsonNode);
+
DeleteFile[] deleteFiles = null;
if (jsonNode.has(DELETE_FILES)) {
JsonNode deletesArray = jsonNode.get(DELETE_FILES);
@@ -121,6 +129,8 @@ public class FileScanTaskParser {
}
ResidualEvaluator residualEvaluator = ResidualEvaluator.of(spec, filter,
caseSensitive);
- return new BaseFileScanTask(dataFile, deleteFiles, schemaString,
specString, residualEvaluator);
+ BaseFileScanTask baseFileScanTask =
+ new BaseFileScanTask(dataFile, deleteFiles, schemaString, specString,
residualEvaluator);
+ return new BaseFileScanTask.SplitScanTask(start, length, baseFileScanTask);
}
}
diff --git a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java
b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java
index 221a5507b1..42785c7a77 100644
--- a/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestFileScanTaskParser.java
@@ -74,6 +74,7 @@ public class TestFileScanTaskParser {
+
"\"data-file\":{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\","
+ "\"file-format\":\"PARQUET\",\"partition\":{\"1000\":0},"
+ "\"file-size-in-bytes\":10,\"record-count\":1,\"sort-order-id\":0},"
+ + "\"start\":0,\"length\":10,"
+ "\"delete-files\":[{\"spec-id\":0,\"content\":\"POSITION_DELETES\","
+
"\"file-path\":\"/path/to/data-a-deletes.parquet\",\"file-format\":\"PARQUET\","
+
"\"partition\":{\"1000\":0},\"file-size-in-bytes\":10,\"record-count\":1},"