This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new ff1c1cc99 IMPALA-13467: Fix partition list size calculation for empty
Iceberg scan nodes
ff1c1cc99 is described below
commit ff1c1cc99d4bc0633c9aa0c28edd5601c3186b8a
Author: Peter Rozsa <[email protected]>
AuthorDate: Tue Oct 22 01:57:23 2024 -0700
IMPALA-13467: Fix partition list size calculation for empty Iceberg scan
nodes
This patch adds a condition that checks whether the IcebergScanNode
contains any files before using the size of the partition list. The
partition list size of Iceberg tables is always one regardless of the
scanned files. This behavior can cause NPE in runtime filter generation.
By setting the calculated partition size to 0, the runtime filter
generation does not occur.
Change-Id: I5a0595831f3bd87074144ab7d5da27508e73ef33
Reviewed-on: http://gerrit.cloudera.org:8080/21964
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
.../org/apache/impala/planner/HdfsScanNode.java | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index a72eb3df8..ca992b542 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -1210,9 +1210,22 @@ public class HdfsScanNode extends ScanNode {
totalBytesPerFsEC_ = new TreeMap<>();
Preconditions.checkState((sampleParams_ == null) == (sampledPartitions_ ==
null));
- int partitionsSize = getSampledOrRawPartitions().size();
- boolean allParquet = (partitionsSize > 0) ? true : false;
- boolean allColumnarFormat = (partitionsSize > 0) ? true : false;
+ // Assume all table files are in parquet format and all in columnar format
+ // until proven otherwise.
+ boolean allParquet = true;
+ boolean allColumnarFormat = true;
+
+ if (this instanceof IcebergScanNode && this.fileFormats_.isEmpty()) {
+ // Iceberg tables always have one partition, even if the scan node
contains zero
+ // file descriptors. TODO: IMPALA-13267
+ allParquet = false;
+ allColumnarFormat = false;
+ } else {
+ // If table has no partition, then it is not all parquet, nor it is all
columnar.
+ int partitionsSize = getSampledOrRawPartitions().size();
+ allParquet = partitionsSize > 0;
+ allColumnarFormat = partitionsSize > 0;
+ }
long simpleLimitNumRows = 0; // only used for the simple limit case
boolean isSimpleLimit = sampleParams_ == null &&
(analyzer.getQueryCtx().client_request.getQuery_options()
@@ -1230,7 +1243,7 @@ public class HdfsScanNode extends ScanNode {
String lastFsAuthority = null;
FileSystem lastFileSytem = null;
for (FeFsPartition partition : getSampledOrRawPartitions()) {
- // Save location to local variable beacuse getLocation() can be slow as
it needs to
+ // Save location to local variable because getLocation() can be slow as
it needs to
// decompress the partition's location.
String partitionLocation = partition.getLocation();
Path partitionPath = new Path(partitionLocation);