github-actions[bot] commented on code in PR #65135:
URL: https://github.com/apache/doris/pull/65135#discussion_r3510140252


##########
fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java:
##########
@@ -896,14 +954,66 @@ private Split createIcebergSplit(FileScanTask 
fileScanTask) {
         return split;
     }
 
-    private Split createIcebergSysSplit(FileScanTask fileScanTask) {
-        long rowCount = fileScanTask.file() == null ? 1 : 
fileScanTask.file().recordCount();
+    private Split createIcebergSysSplit(ScanTask scanTask) {
+        long rowCount = Math.max(scanTask.estimatedRowsCount(), 1L);
+        if (scanTask.isFileScanTask() && scanTask.asFileScanTask().file() != 
null) {
+            rowCount = 
Math.max(scanTask.asFileScanTask().file().recordCount(), 1L);
+        }
         IcebergSplit split = IcebergSplit.newSysTableSplit(
-                SerializationUtil.serializeToBase64(fileScanTask), rowCount);
+                SerializationUtil.serializeToBase64(scanTask), rowCount);
         split.setTableFormatType(TableFormatType.ICEBERG);
         return split;
     }
 
+    private Split createIcebergPositionDeleteSysSplit(PositionDeletesScanTask 
task) {
+        DeleteFile deleteFile = task.file();
+        String originalPath = deleteFile.path().toString();
+        LocationPath locationPath = createLocationPathWithCache(originalPath);
+        IcebergSplit split = IcebergSplit.newPositionDeleteSysTableSplit(
+                locationPath, task.start(), task.length(), 
deleteFile.fileSizeInBytes(),
+                storagePropertiesMap, originalPath);
+        split.setTableFormatType(TableFormatType.ICEBERG_POSITION_DELETES);
+        
split.setPositionDeleteFileFormat(getNativePositionDeleteFileFormat(deleteFile.format()));
+        split.setPositionDeleteOriginalPath(originalPath);
+        if (deleteFile.format() == FileFormat.PUFFIN) {
+            
split.setPositionDeleteContent(IcebergDeleteFileFilter.DeletionVector.type());
+            
split.setPositionDeleteReferencedDataFilePath(deleteFile.referencedDataFile());
+            split.setPositionDeleteContentOffset(deleteFile.contentOffset());
+            
split.setPositionDeleteContentSizeInBytes(deleteFile.contentSizeInBytes());
+        } else {
+            
split.setPositionDeleteContent(IcebergDeleteFileFilter.PositionDelete.type());
+        }
+
+        split.setPartitionSpecId(deleteFile.specId());
+        PartitionSpec partitionSpec = 
icebergTable.specs().get(deleteFile.specId());
+        if (partitionSpec != null && partitionSpec.isPartitioned() && 
deleteFile.partition() != null) {
+            split.setPartitionDataJson(getPartitionDataObjectJson(
+                    (PartitionData) deleteFile.partition(), partitionSpec));
+        }
+        return split;
+    }
+
+    private TFileFormatType getNativePositionDeleteFileFormat(FileFormat 
fileFormat) {
+        if (fileFormat == FileFormat.PARQUET || fileFormat == 
FileFormat.PUFFIN) {
+            return TFileFormatType.FORMAT_PARQUET;
+        } else if (fileFormat == FileFormat.ORC) {
+            return TFileFormatType.FORMAT_ORC;
+        }
+        throw new UnsupportedOperationException(
+                "Unsupported Iceberg position delete file format: " + 
fileFormat);
+    }
+
+    private String getPartitionDataObjectJson(PartitionData partitionData, 
PartitionSpec partitionSpec) {
+        List<String> partitionValues = IcebergUtils.getPartitionValues(
+                partitionData, partitionSpec, sessionVariable.getTimeZone());
+        Map<String, String> partitionJson = new LinkedHashMap<>();
+        List<PartitionField> fields = partitionSpec.fields();
+        for (int i = 0; i < fields.size(); i++) {
+            partitionJson.put(fields.get(i).name(), partitionValues.get(i));

Review Comment:
   The partition payload here loses the actual JSON types. 
`getPartitionValues()` returns strings for every Iceberg partition value, and 
`Gson` serializes this `Map<String, String>` as e.g. `{"id":"1"}` for an 
integer partition. On BE, `_append_partition_column()` deserializes that object 
straight into the `partition` struct column; the struct serde forwards `"1"` to 
the numeric field serde, which does not strip JSON quotes, and nullable struct 
fields then turn the parse failure into NULL. So a table partitioned by an 
integer/date field will expose wrong `partition` values in `$position_deletes` 
even though the new tests only cover string `dt` partitions. Please preserve 
typed JSON values here, and add a regression with at least one non-string 
partition column.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to