Repository: kudu
Updated Branches:
  refs/heads/master 13ea03146 -> 889a04ad9


KUDU-1893 Ensure evaluation of added columns

During a normal scan, a CFileIterator sets a flag to indicate whether
the block supports decoder-level evaluation, and if, after returning,
this flag has not been set to false, the returned data will be
evaluated against the predicate.

Columns that are added after table creation and after a flush will use
DefaultColumnValueIterators to scan. These iterators were not setting
the flag, so the predicates would not be evaluated on these columns.
All rows in the existing results set would be returned, regardless of
predicates on added columns.

If a column predicate is added to an added column that should filter
out some rows, the scan will yield incorrect results. There is added
test coverage in all_types-scan-correctness-test.cc. Without the
changes in this patch, RunTests() will fail at the following points:
* Null default, Range+IsNull
* Null default, Range+IsNotNull
* Null default, Range
* Non-null default, Range+IsNull
* Non-null default, Range+IsNotNull
* Non-null default, Range with Default
* Non-null default, Range without Default

This patch addresses this by forcing predicate evaluation by the
DefaultColumnValueIterator.

Change-Id: Ic10919962d11effbb1b66d97660acd012b6b4be9
Reviewed-on: http://gerrit.cloudera.org:8080/6129
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3907340a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3907340a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3907340a

Branch: refs/heads/master
Commit: 3907340a28bad494cc28a0c93431372a811159d0
Parents: 13ea031
Author: Andrew Wong <[email protected]>
Authored: Tue Feb 28 16:30:32 2017 -0800
Committer: David Ribeiro Alves <[email protected]>
Committed: Thu Mar 2 17:22:55 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/cfile_reader.cc                  |  14 +
 src/kudu/common/column_predicate.cc             |  20 +-
 src/kudu/common/column_predicate.h              |   4 +
 .../tablet/all_types-scan-correctness-test.cc   | 388 ++++++++++++++++---
 4 files changed, 368 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3907340a/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index d95b618..0b1bfff 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -499,7 +499,16 @@ Status 
DefaultColumnValueIterator::Scan(ColumnMaterializationContext* ctx) {
     ColumnDataView dst_view(dst);
     dst_view.SetNullBits(dst->nrows(), value_ != nullptr);
   }
+  // Cases where the selection vector can be cleared:
+  // 1. There is a read_default and it does not satisfy the predicate
+  // 2. There is no read_default and the predicate is searching for NULLs
   if (value_ != nullptr) {
+    if (ctx->DecoderEvalNotDisabled() &&
+        !ctx->pred()->EvaluateCell(typeinfo_->physical_type(), value_)) {
+      SelectionVectorView sel_view(ctx->sel());
+      sel_view.ClearBits(dst->nrows());
+      return Status::OK();
+    }
     if (typeinfo_->physical_type() == BINARY) {
       const Slice *src_slice = reinterpret_cast<const Slice *>(value_);
       Slice dst_slice;
@@ -514,6 +523,11 @@ Status 
DefaultColumnValueIterator::Scan(ColumnMaterializationContext* ctx) {
         dst->SetCellValue(i, value_);
       }
     }
+  } else {
+    if (ctx->DecoderEvalNotDisabled() && !ctx->EvaluatingIsNull()) {
+      SelectionVectorView sel_view(ctx->sel());
+      sel_view.ClearBits(dst->nrows());
+    }
   }
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/3907340a/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc 
b/src/kudu/common/column_predicate.cc
index 3b80104..2131b4e 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -582,6 +582,24 @@ void ColumnPredicate::EvaluateForPhysicalType(const 
ColumnBlock& block,
   LOG(FATAL) << "unknown predicate type";
 }
 
+bool ColumnPredicate::EvaluateCell(DataType type, const void* cell) const {
+  switch (type) {
+    case BOOL: return EvaluateCell<BOOL>(cell);
+    case INT8: return EvaluateCell<INT8>(cell);
+    case INT16: return EvaluateCell<INT16>(cell);
+    case INT32: return EvaluateCell<INT32>(cell);
+    case INT64: return EvaluateCell<INT64>(cell);
+    case UINT8: return EvaluateCell<UINT8>(cell);
+    case UINT16: return EvaluateCell<UINT16>(cell);
+    case UINT32: return EvaluateCell<UINT32>(cell);
+    case UINT64: return EvaluateCell<UINT64>(cell);
+    case FLOAT: return EvaluateCell<FLOAT>(cell);
+    case DOUBLE: return EvaluateCell<DOUBLE>(cell);
+    case BINARY: return EvaluateCell<BINARY>(cell);
+    default: LOG(FATAL) << "unknown physical type: " << 
GetTypeInfo(type)->name();
+  }
+}
+
 void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector* sel) 
const {
   DCHECK(sel);
   switch (block.type_info()->physical_type()) {
@@ -597,7 +615,7 @@ void ColumnPredicate::Evaluate(const ColumnBlock& block, 
SelectionVector* sel) c
     case FLOAT: return EvaluateForPhysicalType<FLOAT>(block, sel);
     case DOUBLE: return EvaluateForPhysicalType<DOUBLE>(block, sel);
     case BINARY: return EvaluateForPhysicalType<BINARY>(block, sel);
-    default: LOG(FATAL) << "unknown physical type: " << 
block.type_info()->physical_type();
+    default: LOG(FATAL) << "unknown physical type: " << 
block.type_info()->name();
   }
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/3907340a/src/kudu/common/column_predicate.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.h 
b/src/kudu/common/column_predicate.h
index d28f9b4..6248a68 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -196,6 +196,10 @@ class ColumnPredicate {
     LOG(FATAL) << "unknown predicate type";
   }
 
+  // Evaluate the predicate on a single cell. Used if the physical type is 
only known at run-time.
+  // Otherwise, use EvaluateCell<DataType>.
+  bool EvaluateCell(DataType type, const void* cell) const;
+
   // Print the predicate for debugging.
   std::string ToString() const;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/3907340a/src/kudu/tablet/all_types-scan-correctness-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/all_types-scan-correctness-test.cc 
b/src/kudu/tablet/all_types-scan-correctness-test.cc
index 34896de..2bce777 100644
--- a/src/kudu/tablet/all_types-scan-correctness-test.cc
+++ b/src/kudu/tablet/all_types-scan-correctness-test.cc
@@ -9,8 +9,32 @@
 namespace kudu {
 namespace tablet {
 
+// Column numbers; kColX refers to column with name "val_x".
+static const int kColA = 1;
+static const int kColB = 2;
+static const int kColC = 3;
+
+// Rows added after altering and before altering the table.
+static const int kNumAddedRows = 2000;
+static const int kNumBaseRows = 10000;
+
+// Number of unique values in each column.
+static const int kCardinality = 21;
+
+// Lower and upper bounds of the predicates.
+static const int kLower = 3;
+static const int kUpper = 10;
+
+// Number of elements to allocate memory for. Each range predicate requires 
two elements, and each
+// call to AddColumn(read_default) requires one. Must be greater than the 
number of elements
+// actually needed.
+static const int kNumAllocatedElements = 32;
+
+// Strings and binaries will have the following string length.
+static const int kStrlen = 10;
+
 struct RowOpsBase {
-  RowOpsBase(DataType type, EncodingType encoding) {
+  RowOpsBase(DataType type, EncodingType encoding) : type_(type), 
encoding_(encoding) {
     schema_ = Schema({ColumnSchema("key", INT32),
                      ColumnSchema("val_a", type, true, nullptr, nullptr,
                          ColumnStorageAttributes(encoding, 
DEFAULT_COMPRESSION)),
@@ -18,6 +42,8 @@ struct RowOpsBase {
                          ColumnStorageAttributes(encoding, 
DEFAULT_COMPRESSION))}, 1);
 
   }
+  DataType type_;
+  EncodingType encoding_;
   Schema schema_;
   Schema altered_schema_;
 };
@@ -25,34 +51,48 @@ struct RowOpsBase {
 template<typename KeyTypeWrapper>
 struct SliceTypeRowOps : public RowOpsBase {
   SliceTypeRowOps() : RowOpsBase(KeyTypeWrapper::type, 
KeyTypeWrapper::encoding),
-    strs_(16), slices_(16), cur(0) {}
+    strs_(kNumAllocatedElements), slices_(kNumAllocatedElements), cur(0) {}
 
   // Assumes the string representation of n is under strlen characters.
-  std::string LeftZeroPadded(const int& n, const int& strlen) {
+  std::string LeftZeroPadded(int n, int strlen) {
     return StringPrintf(Substitute("%0$0$1", strlen, PRId64).c_str(), 
static_cast<int64_t>(n));
   }
 
-  void GenerateRow(const int& value, KuduPartialRow* row) {
+  void GenerateRow(int value, bool altered, KuduPartialRow* row) {
     if (value < 0) {
-      CHECK_OK(row->SetNull(1));
-      CHECK_OK(row->SetNull(2));
+      CHECK_OK(row->SetNull(kColA));
+      CHECK_OK(row->SetNull(kColB));
+      if (altered) {
+        CHECK_OK(row->SetNull(kColC));
+      }
     } else {
       std::string string_a = LeftZeroPadded(value, 10);
       std::string string_b = LeftZeroPadded(value, 10);
+      std::string string_c = LeftZeroPadded(value, 10);
       Slice slice_a(string_a);
       Slice slice_b(string_b);
-      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(1, 
slice_a));
-      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(2, 
slice_b));
+      Slice slice_c(string_c);
+      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColA, 
slice_a));
+      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColB, 
slice_b));
+      if (altered) {
+        CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColC, 
slice_c));
+      }
     }
   }
 
-  ColumnPredicate GenerateRangePredicate(const size_t& col, const int& lower, 
const int& upper) {
+  void* GenerateElement(int value) {
+    strs_[cur] = LeftZeroPadded(value, kStrlen);
+    slices_[cur] = Slice(strs_[cur]);
+    return &slices_[cur++];
+  }
+
+  ColumnPredicate GenerateRangePredicate(const Schema& schema, size_t col, int 
lower, int upper) {
     // Key predicate strings and slices in scope in a vector.
     strs_[cur] = LeftZeroPadded(lower, 10);
-    strs_[cur+1] = LeftZeroPadded(upper, 10);
+    strs_[cur + 1] = LeftZeroPadded(upper, 10);
     slices_[cur] = Slice(strs_[cur]);
-    slices_[cur+1] = Slice(strs_[cur+1]);
-    auto pred = ColumnPredicate::Range(schema_.column(col), &slices_[cur], 
&slices_[cur+1]);
+    slices_[cur + 1] = Slice(strs_[cur + 1]);
+    auto pred = ColumnPredicate::Range(schema.column(col), &slices_[cur], 
&slices_[cur + 1]);
     cur += 2;
     return pred;
   }
@@ -67,24 +107,35 @@ struct SliceTypeRowOps : public RowOpsBase {
 template<typename KeyTypeWrapper>
 struct NumTypeRowOps : public RowOpsBase {
   NumTypeRowOps() : RowOpsBase(KeyTypeWrapper::type, KeyTypeWrapper::encoding),
-    nums_(16), cur(0) {}
+    nums_(kNumAllocatedElements), cur(0) {}
 
   typedef typename TypeTraits<KeyTypeWrapper::type>::cpp_type CppType;
 
-  void GenerateRow(const int& value, KuduPartialRow* row) {
+  void GenerateRow(int value, bool altered, KuduPartialRow* row) {
     if (value < 0) {
-      CHECK_OK(row->SetNull(1));
-      CHECK_OK(row->SetNull(2));
+      CHECK_OK(row->SetNull(kColA));
+      CHECK_OK(row->SetNull(kColB));
+      if (altered) {
+        CHECK_OK(row->SetNull(kColC));
+      }
     } else {
-      row->Set<TypeTraits<KeyTypeWrapper::type>>(1, value);
-      row->Set<TypeTraits<KeyTypeWrapper::type>>(2, value);
+      row->Set<TypeTraits<KeyTypeWrapper::type>>(kColA, value);
+      row->Set<TypeTraits<KeyTypeWrapper::type>>(kColB, value);
+      if (altered) {
+        row->Set<TypeTraits<KeyTypeWrapper::type>>(kColC, value);
+      }
     }
   }
 
-  ColumnPredicate GenerateRangePredicate(const size_t& col , const int& lower, 
const int& upper) {
+  void* GenerateElement(int value) {
+    nums_[cur] = value;
+    return &nums_[cur++];
+  }
+
+  ColumnPredicate GenerateRangePredicate(const Schema& schema, size_t col, int 
lower, int upper) {
     nums_[cur] = lower;
-    nums_[cur+1] = upper;
-    auto pred = ColumnPredicate::Range(schema_.column(col), &nums_[cur], 
&nums_[cur+1]);
+    nums_[cur + 1] = upper;
+    auto pred = ColumnPredicate::Range(schema.column(col), &nums_[cur], 
&nums_[cur + 1]);
     cur += 2;
     return pred;
   }
@@ -97,8 +148,7 @@ struct NumTypeRowOps : public RowOpsBase {
 
 // Calculates the number of values in the range [lower, upper) given a 
repeated, completely
 // non-null pattern with the specified cardinality and the specified number of 
rows.
-int ExpectedCount(const int& nrows, const int& cardinality, const int& 
lower_val,
-    const int& upper_val) {
+int ExpectedCount(int nrows, int cardinality, int lower_val, int upper_val) {
   if (lower_val >= upper_val || lower_val >= cardinality) {
     return 0;
   }
@@ -123,10 +173,10 @@ int ExpectedCount(const int& nrows, const int& 
cardinality, const int& lower_val
   return (nrows / cardinality) * (upper - lower) + last_chunk_count;
 }
 
-void ScopedTraceMsg(const std::string& test_name, const int& expected, const 
int& actual) {
+std::string TraceMsg(const std::string& test_name,  int expected, int actual) {
   std::ostringstream ss;
   ss << test_name << " Scan Results: " << expected << " expected, " << actual 
<< " returned.";
-  SCOPED_TRACE(ss.str());
+  return ss.str();
 }
 
 // Tests for correctness by running predicates on repeated patterns of rows, 
specified by nrows,
@@ -136,7 +186,8 @@ template <class RowOps>
 class AllTypesScanCorrectnessTest : public KuduTabletTest {
 public:
   AllTypesScanCorrectnessTest() : KuduTabletTest(RowOps().schema_), 
rowops_(RowOps()),
-      schema_(rowops_.schema_), base_nrows_(0), base_cardinality_(0), 
base_null_upper_(0) {}
+      schema_(rowops_.schema_), altered_schema_(schema_),
+      base_nrows_(0), base_cardinality_(0), base_null_upper_(0), 
added_nrows_(0) {}
 
   void SetUp() override {
     KuduTabletTest::SetUp();
@@ -150,7 +201,6 @@ public:
     base_nrows_ = nrows;
     base_cardinality_ = cardinality;
     base_null_upper_ = null_upper;
-    RowBuilder rb(client_schema_);
     LocalTabletWriter writer(tablet().get(), &client_schema_);
     KuduPartialRow row(&client_schema_);
     for (int i = 0; i < nrows; i++) {
@@ -159,46 +209,88 @@ public:
       // Populate the bottom of the repeating pattern with NULLs.
       // Note: Non-positive null_upper will yield a completely non-NULL column.
       if (i % cardinality < null_upper) {
-        rowops_.GenerateRow(-1, &row);
+        rowops_.GenerateRow(-1, false, &row);
       } else {
-        rowops_.GenerateRow(i % cardinality, &row);
+        rowops_.GenerateRow(i % cardinality, false, &row);
       }
       ASSERT_OK_FAST(writer.Insert(row));
     }
     ASSERT_OK(tablet()->Flush());
   }
 
+  // Adds the above pattern to the table with keys starting after the base 
rows.
+  void FillAlteredTestTablet(int nrows) {
+    added_nrows_ = nrows;
+    LocalTabletWriter writer(tablet().get(), &altered_schema_);
+    KuduPartialRow row(&altered_schema_);
+    for (int i = 0; i < nrows; i++) {
+      CHECK_OK(row.SetInt32(0, base_nrows_ + i));
+      if (i % base_cardinality_ < base_null_upper_) {
+        rowops_.GenerateRow(-1, true, &row);
+      } else {
+        rowops_.GenerateRow(i % base_cardinality_, true, &row);
+      }
+      ASSERT_OK_FAST(writer.Upsert(row));
+    }
+    ASSERT_OK(tablet()->Flush());
+  }
+
+  // Adds a column called "val_c" to the schema with the specified 
read-default.
+  void AddColumn(int read_default) {
+    void* default_ptr;
+    if (read_default < 0) {
+      default_ptr = nullptr;
+    } else {
+      default_ptr = rowops_.GenerateElement(read_default);
+    }
+    SchemaBuilder builder(tablet()->metadata()->schema());
+    builder.RemoveColumn("val_c");
+    ASSERT_OK(builder.AddColumn("val_c", rowops_.type_, true, default_ptr, 
nullptr));
+    AlterSchema(builder.Build());
+    altered_schema_ = Schema({ColumnSchema("key", INT32),
+                     ColumnSchema("val_a", rowops_.type_, true, nullptr, 
nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, 
DEFAULT_COMPRESSION)),
+                     ColumnSchema("val_b", rowops_.type_, true, nullptr, 
nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, 
DEFAULT_COMPRESSION)),
+                     ColumnSchema("val_c", rowops_.type_, true, default_ptr, 
nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, 
DEFAULT_COMPRESSION))}, 1);
+  }
+
   // Scan the results of a query. Set "count" to the number of results 
satisfying the predicates.
   // ScanSpec must have all desired predicates already added to it.
-  void ScanWithSpec(ScanSpec spec, int* count) {
+  void ScanWithSpec(const Schema& schema, ScanSpec spec, int* count) {
     Arena arena(1028, 1028);
     AutoReleasePool pool;
     *count = 0;
-    spec.OptimizeScan(client_schema_, &arena, &pool, true);
+    spec.OptimizeScan(schema, &arena, &pool, true);
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet()->NewRowIterator(client_schema_, &iter));
+    ASSERT_OK(tablet()->NewRowIterator(schema, &iter));
     ASSERT_OK(iter->Init(&spec));
     ASSERT_TRUE(spec.predicates().empty()) << "Should have accepted all 
predicate.";
     ASSERT_OK(SilentIterateToStringList(iter.get(), count));
   }
 
   void RunTests() {
-    int lower = 3;
-    int upper = 10;
-    int lower_non_null = 3;
-    int zero = 0;
-    if (lower < base_null_upper_) {
+    RunUnalteredTabletTests();
+    RunAlteredTabletTests();
+  }
+
+  // Runs queries on an un-altered table. Correctness is determined by 
comparing the number of rows
+  // returned with the number of rows expected by each query.
+  void RunUnalteredTabletTests() {
+    int lower_non_null = kLower;
+    if (kLower < base_null_upper_) {
       lower_non_null = base_null_upper_;
     }
 
     int count = 0;
     {
       ScanSpec spec;
-      auto pred = rowops_.GenerateRangePredicate(1, lower, upper);
+      auto pred = rowops_.GenerateRangePredicate(schema_, kColA, kLower, 
kUpper);
       spec.AddPredicate(pred);
-      ScanWithSpec(spec, &count);
-      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower_non_null, upper);
-      ScopedTraceMsg("Range", expected_count, count);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower_non_null, kUpper);
+      SCOPED_TRACE(TraceMsg("Range", expected_count, count));
       ASSERT_EQ(expected_count, count);
     }
     {
@@ -209,32 +301,210 @@ public:
       // Since the two columns have identical data, the result will be:
       //   AND:   [lower_val, upper_val)
       ScanSpec spec;
-      ColumnPredicate pred_a = rowops_.GenerateRangePredicate(1, zero, upper);
+      ColumnPredicate pred_a = rowops_.GenerateRangePredicate(schema_, kColA, 
0, kUpper);
       spec.AddPredicate(pred_a);
-      ColumnPredicate pred_b = rowops_.GenerateRangePredicate(2, lower, 
base_cardinality_);
+      ColumnPredicate pred_b = rowops_.GenerateRangePredicate(schema_, kColB, 
kLower,
+                                                              
base_cardinality_);
       spec.AddPredicate(pred_b);
-      ScanWithSpec(spec, &count);
-      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower_non_null, upper);
-      ScopedTraceMsg("MultiColumn Range", expected_count, count);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower_non_null, kUpper);
+      SCOPED_TRACE(TraceMsg("MultiColumn Range", expected_count, count));
       ASSERT_EQ(expected_count, count);
     }
     {
       ScanSpec spec;
-      auto pred = ColumnPredicate::IsNotNull(schema_.column(2));
+      auto pred = ColumnPredicate::IsNotNull(schema_.column(kColB));
       spec.AddPredicate(pred);
-      ScanWithSpec(spec, &count);
+      ScanWithSpec(schema_, spec, &count);
       int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
base_null_upper_,
           base_cardinality_);
-      ScopedTraceMsg("IsNotNull", expected_count, count);
+      SCOPED_TRACE(TraceMsg("IsNotNull", expected_count, count));
       ASSERT_EQ(expected_count, count);
     }
     {
       ScanSpec spec;
-      auto pred = ColumnPredicate::IsNull(schema_.column(2));
+      auto pred = ColumnPredicate::IsNull(schema_.column(kColB));
       spec.AddPredicate(pred);
-      ScanWithSpec(spec, &count);
-      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, zero, 
base_null_upper_);
-      ScopedTraceMsg("IsNull", expected_count, count);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, 0, 
base_null_upper_);
+      SCOPED_TRACE(TraceMsg("IsNull", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+  }
+
+  // Runs tests with an altered table. Queries are run with different 
read-defaults and are deemed
+  // correct if they return the correct number of results.
+  void RunAlteredTabletTests() {
+    int lower_non_null = kLower;
+    // Determine the lowest non-null value in the data range. Used in getting 
expected counts.
+    if (kLower < base_null_upper_) {
+      lower_non_null = base_null_upper_;
+    }
+    // Tests with null read-default. Ex. case: (-: null, 1: pred satisfied, 0: 
pred not satisfied)
+    // kLower: 5, kUpper: 8
+    // Base nrows: 30                 |Altered nrows: 25
+    // Cardinality: 20, null_upper: 3, read_default: NULL
+    // Predicate: (val_b >= 0 && val_b < 8) && (val_c >= 5 && val_c < 20)
+    //  0    5    10   15   20   25   |30   35   40   45   50     key
+    // [---11111000000000000---1111100|---11111000000000000---11] val_b
+    // [------------------------------|---00111111111111111---00] val_c
+    // [000000000000000000000000000000|0000011100000000000000000] Result
+    AddColumn(-1);
+    FillAlteredTestTablet(kNumAddedRows);
+    int count = 0;
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 
kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      int base_expected_count = ExpectedCount(base_nrows_,
+                                              base_cardinality_,
+                                              lower_non_null,
+                                              kUpper);
+      // Since the new column has the same data as the base columns, IsNull 
with a Range predicate
+      // should yield no rows from the added rows.
+      int altered_expected_count = 0;
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Null default, Range+IsNull", expected_count, 
count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is not null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 
kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNotNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Since the table has a null read-default on the added column, the 
IsNotNull predicate
+      // should filter out all rows in the base data.
+      int base_expected_count = 0;
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Null default, Range+IsNotNull", expected_count, 
count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= kLower && val_c < cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, 
kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, 
kLower,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Since the added column has a null read-default, the base rows will be 
completely filtered.
+      int base_expected_count = 0;
+      // The added data will be predicated with [lower, upper).
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Null default, Range", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    // Tests with non-null read-default. Ex. case:
+    // kLower: 5, kUpper: 8
+    // Base nrows: 30                |Altered nrows: 25
+    // Cardinality: 20, null_upper: 3, read_default: 7
+    // Predicate: (val_b >= 0 && val_b < 8) && (val_c >= 5 && val_c < 20)
+    //  0    5    10   15   20   25   |30   35   40   45   50     key
+    // [---11111000000000000---1111100|---11111000000000000---11] val_b
+    // [111111111111111111111111111111|---00111111111111111---00] val_c
+    // [000111110000000000000001111100|0000011100000000000000000] Result
+    int read_default = 7;
+    AddColumn(read_default);
+    FillAlteredTestTablet(kNumAddedRows);
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 
kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // The base data is not null, since there is a read-default.
+      int base_expected_count = 0;
+      // Since the new column has the same data as the base columns, IsNull 
with a Range predicate
+      // should yield no rows from the added rows.
+      int altered_expected_count = 0;
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range+IsNull", expected_count, 
count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is not null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 
kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNotNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      int base_expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower_non_null,
+                                              kUpper);
+      // Since the new column has the same data as the base columns, IsNotNull 
with a Range
+      // predicate should yield the same rows as the Range query alone on the 
altered data.
+      int altered_expected_count = ExpectedCount(added_nrows_, 
base_cardinality_, lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range+IsNotNull", 
expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      int lower = 0;
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= kLower && val_c < cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, 
kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, 
kLower,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Take into account possible null values when calculating expected 
counts.
+      if (base_null_upper_ > 0) {
+        lower = base_null_upper_;
+      }
+      // Because the read_default is in range, the predicate on "val_c" will 
be satisfied
+      // by base data, and all rows that satisfy "pred_b" will be returned.
+      int base_expected_count = ExpectedCount(base_nrows_, base_cardinality_, 
lower, kUpper);
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range with Default", 
expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      // Used to ensure the query does not select the read-default.
+      int default_plus_one = read_default + 1;
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= read_default + 1 && val_c < 
cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, 
kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, 
default_plus_one,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      if (default_plus_one < base_null_upper_) {
+        default_plus_one = base_null_upper_;
+      }
+      // Because the read_default is out of range, the "pred_c" will not be 
satisfied by base data,
+      // so all base rows will be filtered.
+      int base_expected_count = 0;
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 default_plus_one,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range without Default", 
expected_count, count));
       ASSERT_EQ(expected_count, count);
     }
   }
@@ -242,10 +512,11 @@ public:
 protected:
   RowOps rowops_;
   Schema schema_;
+  Schema altered_schema_;
   int base_nrows_;
   int base_cardinality_;
   int base_null_upper_;
-  bool has_been_altered_;
+  int added_nrows_;
 };
 
 template<DataType KeyType, EncodingType Encoding>
@@ -280,17 +551,20 @@ typedef 
::testing::Types<NumTypeRowOps<KeyTypeWrapper<INT8, BIT_SHUFFLE>>,
 TYPED_TEST_CASE(AllTypesScanCorrectnessTest, KeyTypes);
 
 TYPED_TEST(AllTypesScanCorrectnessTest, AllNonNull) {
-  this->FillTestTablet(10000, 21, 0);
+  int null_upper = 0;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
   this->RunTests();
 }
 
 TYPED_TEST(AllTypesScanCorrectnessTest, SomeNull) {
-  this->FillTestTablet(10000, 21, 5);
+  int null_upper = kUpper/2;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
   this->RunTests();
 }
 
 TYPED_TEST(AllTypesScanCorrectnessTest, AllNull) {
-  this->FillTestTablet(10000, 21, 21);
+  int null_upper = kCardinality;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
   this->RunTests();
 }
 

Reply via email to