Repository: impala Updated Branches: refs/heads/master 5e2dcd25d -> 07b8aaf6c
IMPALA-7851: fix underflow in ReserveSpace() The (num_rows - start_idx) calculation was incorrect because num_rows is *not* inclusive of start_idx - it is the number of rows after start_idx to add to the output batch rather than one past the last row to add. (run_rows - start_idx) could then be negative, which results in an underflow when it is implicitly converted to uint32_t. The underflow could result in reserving huge amounts of memory in the vector, which resulted in various misbehaviour such as hangs. Testing: Looped the test under ASAN. Before the fix it hung reliably. After it succeeds quickly. Change-Id: Iaec944f2149a6b9b605a5a885357fd54754dc046 Reviewed-on: http://gerrit.cloudera.org:8080/11976 Reviewed-by: Thomas Marshall <[email protected]> Tested-by: Tim Armstrong <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/07b8aaf6 Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/07b8aaf6 Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/07b8aaf6 Branch: refs/heads/master Commit: 07b8aaf6c57b6050e81def05561fcc37dba77052 Parents: 5e2dcd2 Author: Tim Armstrong <[email protected]> Authored: Wed Nov 21 09:21:38 2018 -0800 Committer: Tim Armstrong <[email protected]> Committed: Wed Nov 21 23:31:56 2018 +0000 ---------------------------------------------------------------------- be/src/service/hs2-util.cc | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/07b8aaf6/be/src/service/hs2-util.cc ---------------------------------------------------------------------- diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc index 66a76bd..65510ee 100644 --- a/be/src/service/hs2-util.cc +++ b/be/src/service/hs2-util.cc @@ -128,8 +128,9 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val, // Helper to reserve space in hs2Vals->values and hs2Vals->nulls for the values that the // different implementations of ExprValuesToHS2TColumn will write. template <typename T> -void ReserveSpace(int start_idx, int num_rows, uint32_t output_row_idx, T* hs2Vals) { - int64_t num_output_rows = output_row_idx + num_rows - start_idx; +void ReserveSpace(int num_rows, uint32_t output_row_idx, T* hs2Vals) { + DCHECK_GE(num_rows, 0); + int64_t num_output_rows = output_row_idx + num_rows; int64_t num_null_bytes = BitUtil::RoundUpNumBytes(num_output_rows); // Round up reserve() arguments to power-of-two to avoid accidentally quadratic // behaviour from repeated small increases in size. @@ -141,7 +142,7 @@ void ReserveSpace(int start_idx, int num_rows, uint32_t output_row_idx, T* hs2Va static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->boolVal); + ReserveSpace(num_rows, output_row_idx, &column->boolVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { BooleanVal val = expr_eval->GetBooleanVal(it.Get()); column->boolVal.values.push_back(val.val); @@ -154,7 +155,7 @@ static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* static void TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->byteVal); + ReserveSpace(num_rows, output_row_idx, &column->byteVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { TinyIntVal val = expr_eval->GetTinyIntVal(it.Get()); column->byteVal.values.push_back(val.val); @@ -167,7 +168,7 @@ static void TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBat static void SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->i16Val); + ReserveSpace(num_rows, output_row_idx, &column->i16Val); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { SmallIntVal val = expr_eval->GetSmallIntVal(it.Get()); column->i16Val.values.push_back(val.val); @@ -180,8 +181,9 @@ static void SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->i32Val); + ReserveSpace(num_rows, output_row_idx, &column->i32Val); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { + DCHECK_EQ(output_row_idx, column->i32Val.values.size()); IntVal val = expr_eval->GetIntVal(it.Get()); column->i32Val.values.push_back(val.val); SetNullBit(output_row_idx, val.is_null, &column->i32Val.nulls); @@ -193,7 +195,7 @@ static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* static void BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->i64Val); + ReserveSpace(num_rows, output_row_idx, &column->i64Val); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { BigIntVal val = expr_eval->GetBigIntVal(it.Get()); column->i64Val.values.push_back(val.val); @@ -206,7 +208,7 @@ static void BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc static void FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->doubleVal); + ReserveSpace(num_rows, output_row_idx, &column->doubleVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { FloatVal val = expr_eval->GetFloatVal(it.Get()); column->doubleVal.values.push_back(val.val); @@ -219,7 +221,7 @@ static void FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch static void DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->doubleVal); + ReserveSpace(num_rows, output_row_idx, &column->doubleVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { DoubleVal val = expr_eval->GetDoubleVal(it.Get()); column->doubleVal.values.push_back(val.val); @@ -232,7 +234,7 @@ static void DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc static void TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal); + ReserveSpace(num_rows, output_row_idx, &column->stringVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { TimestampVal val = expr_eval->GetTimestampVal(it.Get()); column->stringVal.values.emplace_back(); @@ -250,7 +252,7 @@ static void TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, static void StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal); + ReserveSpace(num_rows, output_row_idx, &column->stringVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { StringVal val = expr_eval->GetStringVal(it.Get()); if (val.is_null) { @@ -267,7 +269,7 @@ static void StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, const TColumnType& type, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal); + ReserveSpace(num_rows, output_row_idx, &column->stringVal); ColumnType char_type = ColumnType::CreateCharType(type.types[0].scalar_type.len); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { StringVal val = expr_eval->GetStringVal(it.Get()); @@ -285,7 +287,7 @@ static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, static void DecimalExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, const TColumnType& type, RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* column) { - ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal); + ReserveSpace(num_rows, output_row_idx, &column->stringVal); FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) { DecimalVal val = expr_eval->GetDecimalVal(it.Get()); const ColumnType& decimalType = ColumnType::FromThrift(type);
