IMPALA-6661 Make NaN values equal for grouping purposes. Similar to the treatment of NULLs, we want to consider NaN values as equal when grouping.
- When detecting a NaN in a set of row values, the NaN value must be converted to a canonical value - so that all NaN values have the same bit-pattern for hashing purposes. - When doing equality evaluation, floating point types must have additional logic to consider NaN values as equal. - Existing logic for handling NULLs in this way is appropriate for triggering this behavior for NaN values. - Relabel "force null equality" as "inclusive equality" to expand the scope of the concept to a more generic form that includes NaN. Change-Id: I996c4a2e1934fd887046ed0c55457b7285375086 Reviewed-on: http://gerrit.cloudera.org:8080/11535 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Michael Ho <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6684fe19 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6684fe19 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6684fe19 Branch: refs/heads/branch-3.1.0 Commit: 6684fe1900d1c5655f449cf551aa798c1cad3c17 Parents: cb2574b Author: Michal Ostrowski <[email protected]> Authored: Mon Sep 24 18:58:23 2018 -0700 Committer: Zoltan Borok-Nagy <[email protected]> Committed: Tue Nov 13 12:50:23 2018 +0100 ---------------------------------------------------------------------- be/src/codegen/codegen-anyval.cc | 42 ++++++++++++++++++-- be/src/codegen/codegen-anyval.h | 21 +++++++++- be/src/exec/hash-table.cc | 35 +++++++++++----- be/src/exec/hash-table.h | 35 ++++++++-------- be/src/exec/hash-table.inline.h | 4 +- be/src/runtime/raw-value.cc | 2 + be/src/runtime/raw-value.h | 13 ++++++ be/src/runtime/raw-value.inline.h | 25 ++++++++++++ .../queries/QueryTest/aggregation.test | 23 +++++++++++ .../queries/QueryTest/exprs.test | 10 +++++ .../queries/QueryTest/joins.test | 27 +++++++++++++ 11 files changed, 202 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/codegen/codegen-anyval.cc ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-anyval.cc b/be/src/codegen/codegen-anyval.cc index 3e2eb1e..a44eccc 100644 --- a/be/src/codegen/codegen-anyval.cc +++ b/be/src/codegen/codegen-anyval.cc @@ -18,7 +18,7 @@ #include "codegen/codegen-anyval.h" #include "codegen/codegen-util.h" - +#include "runtime/raw-value.h" #include "common/names.h" using namespace impala; @@ -466,6 +466,29 @@ void CodegenAnyVal::SetDate(llvm::Value* date) { value_ = builder_->CreateInsertValue(value_, v, 0, name_); } +void CodegenAnyVal::ConvertToCanonicalForm() { + // Convert the value to a bit pattern that is unambiguous. + // Specifically, for floating point type values, NaN values are converted to + // the same bit pattern. + switch(type_.type) { + case TYPE_FLOAT: + case TYPE_DOUBLE: { + llvm::Value* raw = GetVal(); + llvm::Value* canonical_val; + if (type_.type == TYPE_FLOAT) { + canonical_val = llvm::ConstantFP::getNaN(codegen_->float_type()); + } else { + canonical_val = llvm::ConstantFP::getNaN(codegen_->double_type()); + } + llvm::Value* is_nan = builder_->CreateFCmpUNO(raw, raw, "cmp_nan"); + SetVal(builder_->CreateSelect(is_nan, canonical_val, raw)); + break; + } + default: + ; + } +} + llvm::Value* CodegenAnyVal::GetLoweredPtr(const string& name) const { llvm::Value* lowered_ptr = codegen_->CreateEntryBlockAlloca(*builder_, value_->getType(), name.c_str()); @@ -702,7 +725,8 @@ llvm::Value* CodegenAnyVal::Eq(CodegenAnyVal* other) { } } -llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr) { +llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr, + bool inclusive_equality) { llvm::Value* val = NULL; if (!type_.IsStringType()) { val = builder_->CreateLoad(native_ptr); @@ -718,9 +742,19 @@ llvm::Value* CodegenAnyVal::EqToNativePtr(llvm::Value* native_ptr) { case TYPE_DECIMAL: return builder_->CreateICmpEQ(GetVal(), val, "cmp_raw"); case TYPE_FLOAT: - case TYPE_DOUBLE: + case TYPE_DOUBLE:{ // Use the ordering version "OEQ" to ensure that 'nan' != 'nan'. - return builder_->CreateFCmpOEQ(GetVal(), val, "cmp_raw"); + llvm::Value* local_val = GetVal(); + llvm::Value* cmp_raw = builder_->CreateFCmpOEQ(local_val, val, "cmp_raw"); + if (!inclusive_equality) return cmp_raw; + + // Mirror logic in HashTableCtx::Equals - IMPALA-6661 + llvm::Value* local_is_nan = builder_->CreateFCmpUNO(local_val, + local_val, "local_val_is_nan"); + llvm::Value* val_is_nan = builder_->CreateFCmpUNO(val, val, "val_is_nan"); + llvm::Value* both_nan = builder_->CreateAnd(local_is_nan, val_is_nan); + return builder_->CreateOr(cmp_raw, both_nan, "cmp_raw_with_nan"); + } case TYPE_STRING: case TYPE_VARCHAR: case TYPE_FIXED_UDA_INTERMEDIATE: { http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/codegen/codegen-anyval.h ---------------------------------------------------------------------- diff --git a/be/src/codegen/codegen-anyval.h b/be/src/codegen/codegen-anyval.h index 2bc800e..bf98fd9 100644 --- a/be/src/codegen/codegen-anyval.h +++ b/be/src/codegen/codegen-anyval.h @@ -226,14 +226,31 @@ class CodegenAnyVal { void WriteToSlot(const SlotDescriptor& slot_desc, llvm::Value* tuple, llvm::Value* pool_val, llvm::BasicBlock* insert_before = nullptr); + /// Rewrites the bit values of a value in a canonical form. + /// Floating point values may be "NaN". Nominally, NaN != NaN, but + /// for grouping purposes we want that to not be the case. + /// Therefore all NaN values need to be converted into a consistent + /// form where all bits are the same. This method will do that - + /// ensure that all NaN values have the same bit pattern. + /// + /// Generically speaking, a canonical form of a value ensures that + /// all ambiguity is removed from a value's bit settings -- if there + /// are bits that can be freely changed without changing the logical + /// value of the value. (Currently this only has an impact for NaN + /// float and double values.) + void ConvertToCanonicalForm(); + /// Returns the i1 result of this == other. this and other must be non-null. llvm::Value* Eq(CodegenAnyVal* other); /// Compares this *Val to the value of 'native_ptr'. 'native_ptr' should be a pointer to /// a native type, e.g. StringValue, or TimestampValue. This *Val should match /// 'native_ptr's type (e.g. if this is an IntVal, 'native_ptr' should have type i32*). - /// Returns the i1 result of the equality comparison. - llvm::Value* EqToNativePtr(llvm::Value* native_ptr); + /// Returns the i1 result of the equality comparison. "inclusive_equality" means that + /// the scope of equality will be expanded to include considering as equal scenarios + /// that would otherwise resolve to not-equal, such as a comparison of floating-point + /// "NaN" values. + llvm::Value* EqToNativePtr(llvm::Value* native_ptr, bool inclusive_equality = false); /// Returns the i32 result of comparing this value to 'other' (similar to /// RawValue::Compare()). This and 'other' must be non-null. Return value is < 0 if http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.cc ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index e315fd1..576deca 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -185,7 +185,7 @@ bool HashTableCtx::EvalRow(const TupleRow* row, bool has_null = false; for (int i = 0; i < evals.size(); ++i) { void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); - void* val = evals[i]->GetValue(row); + const void* val = evals[i]->GetValue(row); if (val == NULL) { // If the table doesn't store nulls, no reason to keep evaluating if (!stores_nulls_) return true; @@ -195,8 +195,12 @@ bool HashTableCtx::EvalRow(const TupleRow* row, } else { expr_values_null[i] = false; } - DCHECK_LE(build_exprs_[i]->type().GetSlotSize(), sizeof(NULL_VALUE)); - RawValue::Write(val, loc, build_exprs_[i]->type(), NULL); + const ColumnType& expr_type = build_exprs_[i]->type(); + DCHECK_LE(expr_type.GetSlotSize(), sizeof(NULL_VALUE)); + if (RawValue::IsNaN(val, expr_type)) { + val = RawValue::CanonicalNaNValue(expr_type); + } + RawValue::Write(val, loc, expr_type, NULL); } return has_null; } @@ -230,13 +234,13 @@ uint32_t HashTableCtx::HashVariableLenRow(const uint8_t* expr_values, return hash; } -template <bool FORCE_NULL_EQUALITY> +template <bool INCLUSIVE_EQUALITY> bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept { for (int i = 0; i < build_expr_evals_.size(); ++i) { void* val = build_expr_evals_[i]->GetValue(build_row); if (val == NULL) { - if (!(FORCE_NULL_EQUALITY || finds_nulls_[i])) return false; + if (!(INCLUSIVE_EQUALITY || finds_nulls_[i])) return false; if (!expr_values_null[i]) return false; continue; } else { @@ -245,7 +249,16 @@ bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values, const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i); DCHECK(build_exprs_[i] == &build_expr_evals_[i]->root()); - if (!RawValue::Eq(loc, val, build_exprs_[i]->type())) return false; + + const ColumnType& expr_type = build_exprs_[i]->type(); + if (!RawValue::Eq(loc, val, expr_type)) { + if (INCLUSIVE_EQUALITY) { + bool val_is_nan = RawValue::IsNaN(val, expr_type); + bool local_is_nan = RawValue::IsNaN(loc, expr_type); + if (val_is_nan && local_is_nan) continue; + } + return false; + } } return true; } @@ -786,6 +799,9 @@ Status HashTableCtx::CodegenEvalRow( // Not null block builder.SetInsertPoint(not_null_block); + + result.ConvertToCanonicalForm(); + result.StoreToNativePtr(llvm_loc); builder.CreateBr(continue_block); @@ -1059,7 +1075,7 @@ Status HashTableCtx::CodegenHashRow( // br i1 %cmp_raw10, label %continue3, label %false_block // } Status HashTableCtx::CodegenEquals( - LlvmCodeGen* codegen, bool force_null_equality, llvm::Function** fn) { + LlvmCodeGen* codegen, bool inclusive_equality, llvm::Function** fn) { for (int i = 0; i < build_exprs_.size(); ++i) { // Disable codegen for CHAR if (build_exprs_[i]->type().type == TYPE_CHAR) { @@ -1123,13 +1139,14 @@ Status HashTableCtx::CodegenEquals( // We consider null values equal if we are comparing build rows or if the join // predicate is <=> - if (force_null_equality || finds_nulls_[i]) { + if (inclusive_equality || finds_nulls_[i]) { llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP( NULL, expr_values_null, codegen->GetI32Constant(i), "null_byte_loc"); llvm::Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); row_is_null = builder.CreateICmpNE(null_byte, codegen->GetI8Constant(0)); } + if (inclusive_equality) result.ConvertToCanonicalForm(); // Get llvm value for row_val from 'expr_values' int offset = expr_values_cache_.expr_values_offsets(i); @@ -1154,7 +1171,7 @@ Status HashTableCtx::CodegenEquals( builder.SetInsertPoint(cmp_block); } // Check result == row_val - llvm::Value* is_equal = result.EqToNativePtr(row_val); + llvm::Value* is_equal = result.EqToNativePtr(row_val, inclusive_equality); builder.CreateCondBr(is_equal, continue_block, false_block); builder.SetInsertPoint(continue_block); http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.h ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h index c57b7a5..e98bdfd 100644 --- a/be/src/exec/hash-table.h +++ b/be/src/exec/hash-table.h @@ -165,10 +165,10 @@ class HashTableCtx { Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn); /// Codegen for evaluating a TupleRow and comparing equality. Function signature - /// matches HashTable::Equals(). 'force_null_equality' is true if the generated - /// equality function should treat all NULLs as equal. See the template parameter - /// to HashTable::Equals(). - Status CodegenEquals(LlvmCodeGen* codegen, bool force_null_equality, + /// matches HashTable::Equals(). 'inclusive_equality' is true if the generated + /// equality function should treat all NULLs as equal and all NaNs as equal. + /// See the template parameter to HashTable::Equals(). + Status CodegenEquals(LlvmCodeGen* codegen, bool inclusive_equality, llvm::Function** fn); /// Codegen for hashing expr values. Function prototype matches HashRow identically. @@ -431,11 +431,11 @@ class HashTableCtx { /// Wrapper function for calling correct HashUtil function in non-codegen'd case. uint32_t Hash(const void* input, int len, uint32_t hash) const; - /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness into - /// 'expr_values_null'. This will be replaced by codegen. We do not want this function - /// inlined when cross compiled because we need to be able to differentiate between - /// EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked into the - /// codegen'd function. + /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness + /// into 'expr_values_null'. This will be replaced by codegen. We do not want this + /// function inlined when cross compiled because we need to be able to differentiate + /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked + /// into the codegen'd function. bool IR_NO_INLINE EvalBuildRow( const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { return EvalRow(row, build_expr_evals_, expr_values, expr_values_null); @@ -460,18 +460,17 @@ class HashTableCtx { uint8_t* expr_values, uint8_t* expr_values_null) noexcept; /// Returns true if the values of build_exprs evaluated over 'build_row' equal the - /// values in 'expr_values' with nullness 'expr_values_null'. FORCE_NULL_EQUALITY is - /// true if all nulls should be treated as equal, regardless of the values of - /// 'finds_nulls_'. This will be replaced by codegen. - template <bool FORCE_NULL_EQUALITY> + /// values in 'expr_values' with nullness 'expr_values_null'. INCLUSIVE_EQUALITY + /// means "NULL==NULL" and "NaN==NaN". This will be replaced by codegen. + template <bool INCLUSIVE_EQUALITY> bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept; /// Helper function that calls Equals() with the current row. Always inlined so that /// it does not appear in cross-compiled IR. - template <bool FORCE_NULL_EQUALITY> + template <bool INCLUSIVE_EQUALITY> bool ALWAYS_INLINE Equals(const TupleRow* build_row) const { - return Equals<FORCE_NULL_EQUALITY>(build_row, expr_values_cache_.cur_expr_values(), + return Equals<INCLUSIVE_EQUALITY>(build_row, expr_values_cache_.cur_expr_values(), expr_values_cache_.cur_expr_values_null()); } @@ -848,14 +847,14 @@ class HashTable { /// this function. The values of the expression values cache in 'ht_ctx' will be /// used to probe the hash table. /// - /// 'FORCE_NULL_EQUALITY' is true if NULLs should always be considered equal when - /// comparing two rows. + /// 'INCLUSIVE_EQUALITY' is true if NULLs and NaNs should always be + /// considered equal when comparing two rows. /// /// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe(). /// 'found' indicates that a bucket that contains an equal row is found. /// /// There are wrappers of this function that perform the Find and Insert logic. - template <bool FORCE_NULL_EQUALITY> + template <bool INCLUSIVE_EQUALITY> int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets, HashTableCtx* ht_ctx, uint32_t hash, bool* found); http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/exec/hash-table.inline.h ---------------------------------------------------------------------- diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h index 205abe7..2bcecef 100644 --- a/be/src/exec/hash-table.inline.h +++ b/be/src/exec/hash-table.inline.h @@ -48,7 +48,7 @@ inline void HashTableCtx::ExprValuesCache::NextRow() { DCHECK_LE(cur_expr_values_hash_ - expr_values_hash_array_.get(), capacity_); } -template <bool FORCE_NULL_EQUALITY> +template <bool INCLUSIVE_EQUALITY> inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets, HashTableCtx* ht_ctx, uint32_t hash, bool* found) { DCHECK(buckets != NULL); @@ -66,7 +66,7 @@ inline int64_t HashTable::Probe(Bucket* buckets, int64_t num_buckets, if (LIKELY(!bucket->filled)) return bucket_idx; if (hash == bucket->hash) { if (ht_ctx != NULL && - ht_ctx->Equals<FORCE_NULL_EQUALITY>(GetRow(bucket, ht_ctx->scratch_row_))) { + ht_ctx->Equals<INCLUSIVE_EQUALITY>(GetRow(bucket, ht_ctx->scratch_row_))) { *found = true; return bucket_idx; } http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc index b5de7bc..f15986b 100644 --- a/be/src/runtime/raw-value.cc +++ b/be/src/runtime/raw-value.cc @@ -29,6 +29,8 @@ namespace impala { const int RawValue::ASCII_PRECISION = 16; // print 16 digits for double/float +const double RawValue::CANONICAL_DOUBLE_NAN = nan(""); +const float RawValue::CANONICAL_FLOAT_NAN = nanf(""); void RawValue::PrintValueAsBytes(const void* value, const ColumnType& type, stringstream* stream) { http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.h b/be/src/runtime/raw-value.h index 6ee9191..7bf52a5 100644 --- a/be/src/runtime/raw-value.h +++ b/be/src/runtime/raw-value.h @@ -37,6 +37,12 @@ class RawValue { /// Ascii output precision for double/float static const int ASCII_PRECISION; + /// Single NaN values to ensure all NaN values can be assigned one bit pattern + /// that will always compare and hash the same way. Allows for all NaN values + /// to be put into the same "group by" bucket. + static const double CANONICAL_DOUBLE_NAN; + static const float CANONICAL_FLOAT_NAN; + /// Convert 'value' into ascii and write to 'stream'. NULL turns into "NULL". 'scale' /// determines how many digits after the decimal are printed for floating point numbers, /// -1 indicates to use the stream's current formatting. @@ -97,6 +103,13 @@ class RawValue { /// This is more performant than Compare() == 0 for string equality, mostly because of /// the length comparison check. static inline bool Eq(const void* v1, const void* v2, const ColumnType& type); + + /// Returns true if val/type correspond to a NaN floating point value. + static inline bool IsNaN(const void* val, const ColumnType& type); + + /// Returns a canonical NaN value for a floating point type + /// (which will always have the same bit-pattern to maintain consistency in hashing). + static inline const void* CanonicalNaNValue(const ColumnType& type); }; } http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/be/src/runtime/raw-value.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/raw-value.inline.h b/be/src/runtime/raw-value.inline.h index 03c3699..9206cf4 100644 --- a/be/src/runtime/raw-value.inline.h +++ b/be/src/runtime/raw-value.inline.h @@ -39,6 +39,29 @@ namespace impala { static const uint32_t HASH_VAL_NULL = 0x58081667; static const uint32_t HASH_VAL_EMPTY = 0x7dca7eee; +inline bool RawValue::IsNaN(const void* val, const ColumnType& type) { + switch(type.type) { + case TYPE_FLOAT: + return std::isnan(*reinterpret_cast<const float*>(val)); + case TYPE_DOUBLE: + return std::isnan(*reinterpret_cast<const double*>(val)); + default: + return false; + } +} + +inline const void* RawValue::CanonicalNaNValue(const ColumnType& type) { + switch(type.type) { + case TYPE_FLOAT: + return &CANONICAL_FLOAT_NAN; + case TYPE_DOUBLE: + return &CANONICAL_DOUBLE_NAN; + default: + DCHECK(false); + return nullptr; + } +} + inline bool RawValue::Eq(const void* v1, const void* v2, const ColumnType& type) { const StringValue* string_value1; const StringValue* string_value2; @@ -144,6 +167,7 @@ inline uint32_t RawValue::GetHashValueNonNull<float>(const float* v, const ColumnType& type, uint32_t seed) { DCHECK_EQ(type.type, TYPE_FLOAT); DCHECK(v != NULL); + if (std::isnan(*v)) v = &RawValue::CANONICAL_FLOAT_NAN; return HashUtil::MurmurHash2_64(v, 4, seed); } @@ -152,6 +176,7 @@ inline uint32_t RawValue::GetHashValueNonNull<double>(const double* v, const ColumnType& type, uint32_t seed) { DCHECK_EQ(type.type, TYPE_DOUBLE); DCHECK(v != NULL); + if (std::isnan(*v)) v = &RawValue::CANONICAL_DOUBLE_NAN; return HashUtil::MurmurHash2_64(v, 8, seed); } http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/aggregation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test index 7aba138..69372f8 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test +++ b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test @@ -1378,3 +1378,26 @@ having extract(hour from timestamp_col) = int_col TIMESTAMP,BIGINT ---- RESULTS ==== +---- QUERY +# GROUP BY of NaN values aggregates NaN's as one grouping +select count(*), sqrt(0.5-x) as Z +from (VALUES((1.6 x, 2 y), (3.2, 4), (5.4,6))) T +group by Z +---- RESULTS +3, NaN +---- TYPES +bigint, double +==== +---- QUERY +# GROUP BY of NaN values aggregates NaN's as one grouping +select count(*), cast(sqrt(0.4-x) as FLOAT) as P, cast(sqrt(1.5-y) as FLOAT) as Q +from (VALUES((1.6 x, 1.6 y, 0 z), (0.5, 0.5, 0), (5.4, 6, 0), + (0.5, 0.5, 0), (0.5, 0.5, 0), (-0.6, 0.5, 0))) T +group by P, Q order by P, Q +---- RESULTS +2, NaN, NaN +3, NaN, 1 +1, 1, 1 +---- TYPES +bigint, float, float +==== \ No newline at end of file http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/exprs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index 9187b6a..8a25b85 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -3071,3 +3071,13 @@ where cast(get_json_object(t.json, '$.a.b.c') as int) > 1 ---- TYPES BIGINT ==== +---- QUERY +# IMPALA-6661 - NaN should not evaluate as the same as any other NaN via <=> +WITH W AS (SELECT T.*, CAST(SQRT(X) AS FLOAT) P, CAST(SQRT(Y) AS FLOAT) Q +FROM (VALUES((CAST(-1.0 AS FLOAT) X, CAST(-1.0 AS FLOAT) Y), (-1.0, 0), (0, -1.0), (0, 0))) T ) +SELECT * FROM W WHERE W.Q<=>W.P +---- RESULTS +0,0,0,0 +---- TYPES +FLOAT, FLOAT, FLOAT, FLOAT +==== http://git-wip-us.apache.org/repos/asf/impala/blob/6684fe19/testdata/workloads/functional-query/queries/QueryTest/joins.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test index 643c3b1..bbead6c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/joins.test +++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test @@ -795,3 +795,30 @@ where sqrt(-a.id) = b.float_col / b.double_col ---- TYPES INT ==== +---- QUERY +# Test than 'nan' != 'nan' when joining. +with t1 as (select cast(sqrt(0.5-x) as FLOAT) c1, y c2 from + (VALUES((1.6 x, 0 y), (3.2, 1), (5.4,2), (0.5, 3), (0.5, 4), (-0.5, 5))) XX), + t2 as (select * from t1) +select t1.c1,t2.c1 from t1 right outer join t2 on t1.c1 = t2.c1; +---- RESULTS +0,0 +0,0 +0,0 +0,0 +1,1 +NULL,NaN +NULL,NaN +NULL,NaN +---- TYPES +FLOAT,FLOAT +==== +---- QUERY +# Test that NaN <=> NaN in joins +select t1.float_col as v + from functional.alltypessmall t1, functional.alltypessmall t2 + where sqrt(0.5-t1.float_col) <=> sqrt(0.5-t2.float_col) and t1.float_col > 0.5 +---- RESULTS +---- TYPES +FLOAT +====
