IMPALA-5188: Add slot sorting in TupleDescriptor::LayoutEquals() The slot descriptor vectors are not guaranteed to be sorted on the slot index within a tuple. As a result, TupleDescriptor::LayoutEquals() sometimes returned a wrong result.
In this patch, we sort the vectors of slot descriptors on the slot index within the tuple before comparing the vectors. Testing: - ran EE tests locally. Change-Id: I426ad244678dbfe517262dfb7bbf4adc0247a35e Reviewed-on: http://gerrit.cloudera.org:8080/6610 Reviewed-by: Dan Hecht <[email protected]> Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/50e3abdc Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/50e3abdc Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/50e3abdc Branch: refs/heads/master Commit: 50e3abdc3daf89c76d0d40e7f8c926477c2a1db4 Parents: 920641f Author: Taras Bobrovytsky <[email protected]> Authored: Tue Apr 11 14:07:19 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Thu May 4 02:04:03 2017 +0000 ---------------------------------------------------------------------- be/src/runtime/descriptors.cc | 20 ++++++++++++------- be/src/runtime/descriptors.h | 6 +++++- .../apache/impala/analysis/TupleDescriptor.java | 11 ++++++++++ .../impala/planner/SingleNodePlanner.java | 1 + .../queries/QueryTest/union.test | 21 ++++++++++++++++++++ 5 files changed, 51 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/50e3abdc/be/src/runtime/descriptors.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.cc b/be/src/runtime/descriptors.cc index 52219e6..709ddb1 100644 --- a/be/src/runtime/descriptors.cc +++ b/be/src/runtime/descriptors.cc @@ -341,10 +341,11 @@ string TupleDescriptor::DebugString() const { } bool TupleDescriptor::LayoutEquals(const TupleDescriptor& other_desc) const { - const vector<SlotDescriptor*>& slots = this->slots(); - const vector<SlotDescriptor*>& other_slots = other_desc.slots(); - if (this->byte_size() != other_desc.byte_size()) return false; - if (slots.size() != other_slots.size()) return false; + if (byte_size() != other_desc.byte_size()) return false; + if (slots().size() != other_desc.slots().size()) return false; + + vector<SlotDescriptor*> slots = SlotsOrderedByIdx(); + vector<SlotDescriptor*> other_slots = other_desc.SlotsOrderedByIdx(); for (int i = 0; i < slots.size(); ++i) { if (!slots[i]->LayoutEquals(*other_slots[i])) return false; } @@ -672,10 +673,15 @@ llvm::Value* SlotDescriptor::CodegenGetNullByte( return builder->CreateLoad(byte_ptr, "null_byte"); } +vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const { + vector<SlotDescriptor*> sorted_slots(slots().size()); + for (SlotDescriptor* slot: slots()) sorted_slots[slot->slot_idx_] = slot; + return sorted_slots; +} + llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const { - // Sort slots in the order they will appear in LLVM struct. - vector<SlotDescriptor*> sorted_slots(slots_.size()); - for (SlotDescriptor* slot: slots_) sorted_slots[slot->slot_idx_] = slot; + // Get slots in the order they will appear in LLVM struct. + vector<SlotDescriptor*> sorted_slots = SlotsOrderedByIdx(); // Add the slot types to the struct description. vector<llvm::Type*> struct_fields; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/50e3abdc/be/src/runtime/descriptors.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 87cce64..26f58d3 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -432,7 +432,8 @@ class TupleDescriptor { const int num_null_bytes_; const int null_bytes_offset_; - /// Contains all slots. + /// Contains all slots. Slots are in the same order as the expressions that materialize + /// them. See Tuple::MaterializeExprs(). std::vector<SlotDescriptor*> slots_; /// Contains only materialized string slots. @@ -452,6 +453,9 @@ class TupleDescriptor { TupleDescriptor(const TTupleDescriptor& tdesc); void AddSlot(SlotDescriptor* slot); + + /// Returns slots in their physical order. + vector<SlotDescriptor*> SlotsOrderedByIdx() const; }; class DescriptorTbl { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/50e3abdc/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java index 6ec2d26..de9dc98 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java +++ b/fe/src/main/java/org/apache/impala/analysis/TupleDescriptor.java @@ -231,11 +231,22 @@ public class TupleDescriptor { return ttupleDesc; } + /** + * In some cases changes are made to a tuple after the memory layout has been computed. + * This function allows us to recompute the memory layout. + */ + public void recomputeMemLayout() { + Preconditions.checkState(hasMemLayout_); + hasMemLayout_ = false; + computeMemLayout(); + } + public void computeMemLayout() { if (hasMemLayout_) return; hasMemLayout_ = true; boolean alwaysAddNullBit = hasNullableKuduScanSlots(); + avgSerializedSize_ = 0; // maps from slot size to slot descriptors with that size Map<Integer, List<SlotDescriptor>> slotsBySize = http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/50e3abdc/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java index af6eb93..eb96cdc 100644 --- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java @@ -237,6 +237,7 @@ public class SingleNodePlanner { Preconditions.checkState(collExpr instanceof SlotRef); SlotRef collSlotRef = (SlotRef) collExpr; collSlotRef.getDesc().setIsMaterialized(false); + collSlotRef.getDesc().getParent().recomputeMemLayout(); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/50e3abdc/testdata/workloads/functional-query/queries/QueryTest/union.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/union.test b/testdata/workloads/functional-query/queries/QueryTest/union.test index 5783c24..7fe4020 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/union.test +++ b/testdata/workloads/functional-query/queries/QueryTest/union.test @@ -1150,3 +1150,24 @@ select count(b) from ( ---- TYPES bigint ===== +---- QUERY +# IMPALA-5188: The slot descriptors might be ordered differently for the operands and the +# union tuple. (Regression test). +select 1, 1 +union all +select avg(id), id +from alltypestiny +group by id +---- RESULTS +0,0 +1,1 +1,1 +2,2 +3,3 +4,4 +5,5 +6,6 +7,7 +---- TYPES +double, int +=====
