Repository: impala
Updated Branches:
  refs/heads/master 5e2dcd25d -> 07b8aaf6c


IMPALA-7851: fix underflow in ReserveSpace()

The (num_rows - start_idx) calculation was incorrect because
num_rows is *not* inclusive of start_idx - it is the number
of rows after start_idx to add to the output batch rather than
one past the last row to add.

(run_rows - start_idx) could then be negative, which results in
an underflow when it is implicitly converted to uint32_t.

The underflow could result in reserving huge amounts of memory in the
vector, which resulted in various misbehaviour such as hangs.

Testing:
Looped the test under ASAN. Before the fix it hung reliably. After it
succeeds quickly.

Change-Id: Iaec944f2149a6b9b605a5a885357fd54754dc046
Reviewed-on: http://gerrit.cloudera.org:8080/11976
Reviewed-by: Thomas Marshall <[email protected]>
Tested-by: Tim Armstrong <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/07b8aaf6
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/07b8aaf6
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/07b8aaf6

Branch: refs/heads/master
Commit: 07b8aaf6c57b6050e81def05561fcc37dba77052
Parents: 5e2dcd2
Author: Tim Armstrong <[email protected]>
Authored: Wed Nov 21 09:21:38 2018 -0800
Committer: Tim Armstrong <[email protected]>
Committed: Wed Nov 21 23:31:56 2018 +0000

----------------------------------------------------------------------
 be/src/service/hs2-util.cc | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/07b8aaf6/be/src/service/hs2-util.cc
----------------------------------------------------------------------
diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc
index 66a76bd..65510ee 100644
--- a/be/src/service/hs2-util.cc
+++ b/be/src/service/hs2-util.cc
@@ -128,8 +128,9 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& 
col_val,
 // Helper to reserve space in hs2Vals->values and hs2Vals->nulls for the 
values that the
 // different implementations of ExprValuesToHS2TColumn will write.
 template <typename T>
-void ReserveSpace(int start_idx, int num_rows, uint32_t output_row_idx, T* 
hs2Vals) {
-  int64_t num_output_rows = output_row_idx + num_rows - start_idx;
+void ReserveSpace(int num_rows, uint32_t output_row_idx, T* hs2Vals) {
+  DCHECK_GE(num_rows, 0);
+  int64_t num_output_rows = output_row_idx + num_rows;
   int64_t num_null_bytes = BitUtil::RoundUpNumBytes(num_output_rows);
   // Round up reserve() arguments to power-of-two to avoid accidentally 
quadratic
   // behaviour from repeated small increases in size.
@@ -141,7 +142,7 @@ void ReserveSpace(int start_idx, int num_rows, uint32_t 
output_row_idx, T* hs2Va
 static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->boolVal);
+  ReserveSpace(num_rows, output_row_idx, &column->boolVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     BooleanVal val = expr_eval->GetBooleanVal(it.Get());
     column->boolVal.values.push_back(val.val);
@@ -154,7 +155,7 @@ static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* 
expr_eval, RowBatch*
 static void TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->byteVal);
+  ReserveSpace(num_rows, output_row_idx, &column->byteVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     TinyIntVal val = expr_eval->GetTinyIntVal(it.Get());
     column->byteVal.values.push_back(val.val);
@@ -167,7 +168,7 @@ static void 
TinyIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBat
 static void SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->i16Val);
+  ReserveSpace(num_rows, output_row_idx, &column->i16Val);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     SmallIntVal val = expr_eval->GetSmallIntVal(it.Get());
     column->i16Val.values.push_back(val.val);
@@ -180,8 +181,9 @@ static void 
SmallIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
 static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->i32Val);
+  ReserveSpace(num_rows, output_row_idx, &column->i32Val);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
+    DCHECK_EQ(output_row_idx, column->i32Val.values.size());
     IntVal val = expr_eval->GetIntVal(it.Get());
     column->i32Val.values.push_back(val.val);
     SetNullBit(output_row_idx, val.is_null, &column->i32Val.nulls);
@@ -193,7 +195,7 @@ static void IntExprValuesToHS2TColumn(ScalarExprEvaluator* 
expr_eval, RowBatch*
 static void BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->i64Val);
+  ReserveSpace(num_rows, output_row_idx, &column->i64Val);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     BigIntVal val = expr_eval->GetBigIntVal(it.Get());
     column->i64Val.values.push_back(val.val);
@@ -206,7 +208,7 @@ static void 
BigIntExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc
 static void FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->doubleVal);
+  ReserveSpace(num_rows, output_row_idx, &column->doubleVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     FloatVal val = expr_eval->GetFloatVal(it.Get());
     column->doubleVal.values.push_back(val.val);
@@ -219,7 +221,7 @@ static void 
FloatExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch
 static void DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->doubleVal);
+  ReserveSpace(num_rows, output_row_idx, &column->doubleVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DoubleVal val = expr_eval->GetDoubleVal(it.Get());
     column->doubleVal.values.push_back(val.val);
@@ -232,7 +234,7 @@ static void 
DoubleExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc
 static void TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     RowBatch* batch, int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal);
+  ReserveSpace(num_rows, output_row_idx, &column->stringVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     TimestampVal val = expr_eval->GetTimestampVal(it.Get());
     column->stringVal.values.emplace_back();
@@ -250,7 +252,7 @@ static void 
TimestampExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
 static void StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
     apache::hive::service::cli::thrift::TColumn* column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal);
+  ReserveSpace(num_rows, output_row_idx, &column->stringVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StringVal val = expr_eval->GetStringVal(it.Get());
     if (val.is_null) {
@@ -267,7 +269,7 @@ static void 
StringExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatc
 static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* 
column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal);
+  ReserveSpace(num_rows, output_row_idx, &column->stringVal);
   ColumnType char_type = 
ColumnType::CreateCharType(type.types[0].scalar_type.len);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     StringVal val = expr_eval->GetStringVal(it.Get());
@@ -285,7 +287,7 @@ static void CharExprValuesToHS2TColumn(ScalarExprEvaluator* 
expr_eval,
 static void DecimalExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
     const TColumnType& type, RowBatch* batch, int start_idx, int num_rows,
     uint32_t output_row_idx, apache::hive::service::cli::thrift::TColumn* 
column) {
-  ReserveSpace(start_idx, num_rows, output_row_idx, &column->stringVal);
+  ReserveSpace(num_rows, output_row_idx, &column->stringVal);
   FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
     DecimalVal val = expr_eval->GetDecimalVal(it.Get());
     const ColumnType& decimalType = ColumnType::FromThrift(type);

Reply via email to