Repository: incubator-impala
Updated Branches:
  refs/heads/master 7fc31b534 -> 4918b20ac


IMPALA-4408: Omit null bytes for Kudu scans with no nullable slots.

Kudu does not allocate null bytes if all projected columns are
non-nullable. Otherwise, Kudu allocates a null bit for all columns,
even the non-nullable ones. The bug was that Impala's memory layout
did not match the first requirement.

Change-Id: I762ad9d5cc4198922ea4b5218c504fde355c49a5
Reviewed-on: http://gerrit.cloudera.org:8080/4892
Reviewed-by: Tim Armstrong <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/4918b20a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/4918b20a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/4918b20a

Branch: refs/heads/master
Commit: 4918b20ac0a46d5dbc4455f0bb3e39eabe7f1cbd
Parents: 7fc31b5
Author: Alex Behm <[email protected]>
Authored: Mon Oct 31 14:29:42 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Tue Nov 1 01:47:30 2016 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/SlotDescriptor.java  |  1 -
 .../apache/impala/analysis/TupleDescriptor.java | 26 ++++++++++++++++----
 .../queries/QueryTest/kudu-scan-node.test       |  8 ++++++
 3 files changed, 29 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index 9a9c058..109cfa2 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -142,7 +142,6 @@ public class SlotDescriptor {
 
   public Path getPath() { return path_; }
   public boolean isScanSlot() { return path_ != null && 
path_.isRootedAtTable(); }
-  public boolean isKuduScanSlot() { return getColumn() instanceof KuduColumn; }
   public Column getColumn() { return !isScanSlot() ? null : 
path_.destColumn(); }
 
   public ColumnStats getStats() {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java 
b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
index bf6b93a..8dab33a 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java
@@ -27,6 +27,7 @@ import java.util.Map;
 import org.apache.commons.lang.StringUtils;
 import org.apache.impala.catalog.ColumnStats;
 import org.apache.impala.catalog.HdfsTable;
+import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.thrift.TTupleDescriptor;
@@ -59,9 +60,11 @@ import com.google.common.collect.Lists;
  *
  * Memory Layout
  * Slots are placed in descending order by size with trailing bytes to store 
null flags.
- * Null flags are omitted for non-nullable slots, except for Kudu scan slots 
which always
- * have a null flag to match Kudu's client row format. There is no padding 
between tuples
- * when stored back-to-back in a row batch.
+ * Null flags are omitted for non-nullable slots, with the following 
exceptions for Kudu
+ * scan tuples to match Kudu's client row format: If there is at least one 
nullable Kudu
+ * scan slot, then all slots (even non-nullable ones) get a null flag. If 
there are no
+ * nullable Kudu scan slots, then there are also no null flags.
+ * There is no padding between tuples when stored back-to-back in a row batch.
  *
  * Example: select bool_col, int_col, string_col, smallint_col from 
functional.alltypes
  * Slots:   string_col|int_col|smallint_col|bool_col|null_byte
@@ -232,6 +235,8 @@ public class TupleDescriptor {
     if (hasMemLayout_) return;
     hasMemLayout_ = true;
 
+    boolean alwaysAddNullBit = hasNullableKuduScanSlots();
+
     // maps from slot size to slot descriptors with that size
     Map<Integer, List<SlotDescriptor>> slotsBySize =
         new HashMap<Integer, List<SlotDescriptor>>();
@@ -253,7 +258,7 @@ public class TupleDescriptor {
       }
       totalSlotSize += d.getType().getSlotSize();
       slotsBySize.get(d.getType().getSlotSize()).add(d);
-      if (d.getIsNullable() || d.isKuduScanSlot()) ++numNullBits;
+      if (d.getIsNullable() || alwaysAddNullBit) ++numNullBits;
     }
     // we shouldn't have anything of size <= 0
     Preconditions.checkState(!slotsBySize.containsKey(0));
@@ -280,7 +285,7 @@ public class TupleDescriptor {
         slotOffset += slotSize;
 
         // assign null indicator
-        if (d.getIsNullable() || d.isKuduScanSlot()) {
+        if (d.getIsNullable() || alwaysAddNullBit) {
           d.setNullIndicatorByte(nullIndicatorByte);
           d.setNullIndicatorBit(nullIndicatorBit);
           nullIndicatorBit = (nullIndicatorBit + 1) % 8;
@@ -301,6 +306,17 @@ public class TupleDescriptor {
   }
 
   /**
+   * Returns true if this tuple has at least one materialized nullable Kudu 
scan slot.
+   */
+  private boolean hasNullableKuduScanSlots() {
+    if (!(getTable() instanceof KuduTable)) return false;
+    for (SlotDescriptor d: slots_) {
+      if (d.isMaterialized() && d.getIsNullable()) return true;
+    }
+    return false;
+  }
+
+  /**
    * Return true if the slots being materialized are all partition columns.
    */
   public boolean hasClusteringColsOnly() {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/4918b20a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test 
b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
index 494bb58..1017658 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
@@ -47,3 +47,11 @@ SELECT * FROM impala_2635_t1 UNION ALL SELECT * FROM 
impala_2635_t2;
 ---- TYPES
 BIGINT, STRING
 ====
+---- QUERY
+# IMPALA-4408: Test Kudu scans where all materialized slots are non-nullable.
+select count(int_col) from functional_kudu.tinyinttable
+---- RESULTS
+10
+---- TYPES
+BIGINT
+====

Reply via email to