Repository: incubator-impala Updated Branches: refs/heads/master 7fc31b534 -> 4918b20ac
IMPALA-4408: Omit null bytes for Kudu scans with no nullable slots. Kudu does not allocate null bytes if all projected columns are non-nullable. Otherwise, Kudu allocates a null bit for all columns, even the non-nullable ones. The bug was that Impala's memory layout did not match the first requirement. Change-Id: I762ad9d5cc4198922ea4b5218c504fde355c49a5 Reviewed-on: http://gerrit.cloudera.org:8080/4892 Reviewed-by: Tim Armstrong <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4918b20a Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4918b20a Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4918b20a Branch: refs/heads/master Commit: 4918b20ac0a46d5dbc4455f0bb3e39eabe7f1cbd Parents: 7fc31b5 Author: Alex Behm <[email protected]> Authored: Mon Oct 31 14:29:42 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Tue Nov 1 01:47:30 2016 +0000 ---------------------------------------------------------------------- .../apache/impala/analysis/SlotDescriptor.java | 1 - .../apache/impala/analysis/TupleDescriptor.java | 26 ++++++++++++++++---- .../queries/QueryTest/kudu-scan-node.test | 8 ++++++ 3 files changed, 29 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java index 9a9c058..109cfa2 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java @@ -142,7 +142,6 @@ public class SlotDescriptor { public Path getPath() { return path_; } public boolean isScanSlot() { return path_ != null && path_.isRootedAtTable(); } - public boolean isKuduScanSlot() { return getColumn() instanceof KuduColumn; } public Column getColumn() { return !isScanSlot() ? null : path_.destColumn(); } public ColumnStats getStats() { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java index bf6b93a..8dab33a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java @@ -27,6 +27,7 @@ import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.impala.catalog.ColumnStats; import org.apache.impala.catalog.HdfsTable; +import org.apache.impala.catalog.KuduTable; import org.apache.impala.catalog.StructType; import org.apache.impala.catalog.Table; import org.apache.impala.thrift.TTupleDescriptor; @@ -59,9 +60,11 @@ import com.google.common.collect.Lists; * * Memory Layout * Slots are placed in descending order by size with trailing bytes to store null flags. - * Null flags are omitted for non-nullable slots, except for Kudu scan slots which always - * have a null flag to match Kudu's client row format. There is no padding between tuples - * when stored back-to-back in a row batch. + * Null flags are omitted for non-nullable slots, with the following exceptions for Kudu + * scan tuples to match Kudu's client row format: If there is at least one nullable Kudu + * scan slot, then all slots (even non-nullable ones) get a null flag. If there are no + * nullable Kudu scan slots, then there are also no null flags. + * There is no padding between tuples when stored back-to-back in a row batch. * * Example: select bool_col, int_col, string_col, smallint_col from functional.alltypes * Slots: string_col|int_col|smallint_col|bool_col|null_byte @@ -232,6 +235,8 @@ public class TupleDescriptor { if (hasMemLayout_) return; hasMemLayout_ = true; + boolean alwaysAddNullBit = hasNullableKuduScanSlots(); + // maps from slot size to slot descriptors with that size Map<Integer, List<SlotDescriptor>> slotsBySize = new HashMap<Integer, List<SlotDescriptor>>(); @@ -253,7 +258,7 @@ public class TupleDescriptor { } totalSlotSize += d.getType().getSlotSize(); slotsBySize.get(d.getType().getSlotSize()).add(d); - if (d.getIsNullable() || d.isKuduScanSlot()) ++numNullBits; + if (d.getIsNullable() || alwaysAddNullBit) ++numNullBits; } // we shouldn't have anything of size <= 0 Preconditions.checkState(!slotsBySize.containsKey(0)); @@ -280,7 +285,7 @@ public class TupleDescriptor { slotOffset += slotSize; // assign null indicator - if (d.getIsNullable() || d.isKuduScanSlot()) { + if (d.getIsNullable() || alwaysAddNullBit) { d.setNullIndicatorByte(nullIndicatorByte); d.setNullIndicatorBit(nullIndicatorBit); nullIndicatorBit = (nullIndicatorBit + 1) % 8; @@ -301,6 +306,17 @@ public class TupleDescriptor { } /** + * Returns true if this tuple has at least one materialized nullable Kudu scan slot. + */ + private boolean hasNullableKuduScanSlots() { + if (!(getTable() instanceof KuduTable)) return false; + for (SlotDescriptor d: slots_) { + if (d.isMaterialized() && d.getIsNullable()) return true; + } + return false; + } + + /** * Return true if the slots being materialized are all partition columns. */ public boolean hasClusteringColsOnly() { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test index 494bb58..1017658 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test @@ -47,3 +47,11 @@ SELECT * FROM impala_2635_t1 UNION ALL SELECT * FROM impala_2635_t2; ---- TYPES BIGINT, STRING ==== +---- QUERY +# IMPALA-4408: Test Kudu scans where all materialized slots are non-nullable. +select count(int_col) from functional_kudu.tinyinttable +---- RESULTS +10 +---- TYPES +BIGINT +====
