This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 1bde51fa0 KUDU-3384: Handling IncrementEncodedKey failure
1bde51fa0 is described below

commit 1bde51fa0d5800ee76160258a8e879f7cce66a75
Author: zhangyifan27 <[email protected]>
AuthorDate: Wed Jul 20 14:08:28 2022 +0800

    KUDU-3384: Handling IncrementEncodedKey failure
    
    While using lower/upper of a DRS to optimize scan at DRS level, we
    should handle the cases where a upper bound key cannot be incremented.
    This patch fixes it, enables the reproduction scenario and also adds
    a regression test.
    
    Change-Id: I22c48f9893364c3fad5597431e4acfbcb2a4b43d
    Reviewed-on: http://gerrit.cloudera.org:8080/18761
    Tested-by: Alexey Serbin <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/client/flex_partitioning_client-test.cc |  4 +-
 src/kudu/tablet/cfile_set-test.cc                | 55 +++++++++++++++++++++++-
 src/kudu/tablet/cfile_set.cc                     |  8 +++-
 3 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/src/kudu/client/flex_partitioning_client-test.cc 
b/src/kudu/client/flex_partitioning_client-test.cc
index e23eb7272..c9d65ca72 100644
--- a/src/kudu/client/flex_partitioning_client-test.cc
+++ b/src/kudu/client/flex_partitioning_client-test.cc
@@ -1970,9 +1970,7 @@ class FlexPartitioningScanTest : public 
FlexPartitioningTest {
 };
 
 // This scenario is to reproduce the issue described in KUDU-3384.
-//
-// TODO(aserbin): enable the scenario once KUDU-3384 is fixed
-TEST_F(FlexPartitioningScanTest, DISABLED_MaxKeyValue) {
+TEST_F(FlexPartitioningScanTest, MaxKeyValue) {
   static constexpr const char* const kTableName = "max_key_value";
 
   unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
diff --git a/src/kudu/tablet/cfile_set-test.cc 
b/src/kudu/tablet/cfile_set-test.cc
index 6b115cbe9..0e6723d87 100644
--- a/src/kudu/tablet/cfile_set-test.cc
+++ b/src/kudu/tablet/cfile_set-test.cc
@@ -100,7 +100,7 @@ class TestCFileSet : public KuduRowSetTest {
     FLAGS_cfile_default_block_size = 512;
   }
 
-  // Write out a test rowset with two int columns.
+  // Write out a test rowset with three int columns.
   // The first column contains the row index * 2.
   // The second contains the row index * 10.
   // The third column contains index * 100, but is never read.
@@ -121,6 +121,30 @@ class TestCFileSet : public KuduRowSetTest {
     ASSERT_OK(rsw.Finish());
   }
 
+  void WriteTestRowSetWithMaxValue(int nrows) {
+    DiskRowSetWriter rsw(
+        rowset_meta_.get(), &schema_, BloomFilterSizing::BySizeAndFPRate(32 * 
1024, 0.01F));
+
+    ASSERT_OK(rsw.Open());
+
+    RowBuilder rb(&schema_);
+    int i = INT32_MAX - nrows + 1;
+    while (i != INT32_MAX) {
+      rb.Reset();
+      rb.AddInt32(i);
+      rb.AddInt32(i);
+      rb.AddInt32(i);
+      ASSERT_OK_FAST(WriteRow(rb.data(), &rsw));
+      i++;
+    }
+    rb.Reset();
+    rb.AddInt32(i);
+    rb.AddInt32(i);
+    rb.AddInt32(i);
+    ASSERT_OK_FAST(WriteRow(rb.data(), &rsw));
+    ASSERT_OK(rsw.Finish());
+  }
+
   // Int32 type add probe to the bloom filter.
   // bf1_contain: 0 2 4 6 8 ... (2n)th key for column 1 to form bloom filter.
   // bf1_exclude: 1 3 5 7 9 ... (2n + 1)th key for column 1 to form bloom 
filter.
@@ -628,6 +652,35 @@ TEST_F(TestCFileSet, TestInListPredicates) {
   DoTestInListScan(fileset, 1000, 10);
 }
 
+// Regression test for KUDU-3384
+TEST_F(TestCFileSet, TestKUDU3384) {
+  constexpr int kNumRows = 10;
+  WriteTestRowSetWithMaxValue(kNumRows);
+
+  shared_ptr<CFileSet> fileset;
+  ASSERT_OK(CFileSet::Open(
+      rowset_meta_, MemTracker::GetRootTracker(), 
MemTracker::GetRootTracker(), nullptr, &fileset));
+
+  // Create iterator.
+  unique_ptr<CFileSet::Iterator> cfile_iter(fileset->NewIterator(&schema_, 
nullptr));
+  unique_ptr<RowwiseIterator> 
iter(NewMaterializingIterator(std::move(cfile_iter)));
+
+  // Check a full scan is successful.
+  ScanSpec spec;
+  ASSERT_OK(iter->Init(&spec));
+  RowBlockMemory mem(1024);
+  RowBlock block(&schema_, 100, &mem);
+  int selected_size = 0;
+  while (iter->HasNext()) {
+    mem.Reset();
+    ASSERT_OK_FAST(iter->NextBlock(&block));
+    selected_size += block.selection_vector()->CountSelected();
+  }
+  ASSERT_EQ(kNumRows, selected_size);
+  // Check a range scan is successful.
+  DoTestRangeScan(fileset, INT32_MAX - kNumRows, INT32_MAX);
+}
+
 class InListPredicateBenchmark : public KuduRowSetTest {
  public:
   InListPredicateBenchmark()
diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index e145edbae..7620eea51 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -442,8 +442,12 @@ Status CFileSet::Iterator::OptimizePKPredicates(ScanSpec* 
spec) {
 
   RETURN_NOT_OK(EncodedKey::DecodeEncodedString(
       tablet_schema, &arena_, base_data_->max_encoded_key_, &implicit_ub_key));
-  RETURN_NOT_OK(EncodedKey::IncrementEncodedKey(tablet_schema, 
&implicit_ub_key, &arena_));
-  if (!ub_key || ub_key->encoded_key() > implicit_ub_key->encoded_key()) {
+  Status s = EncodedKey::IncrementEncodedKey(tablet_schema, &implicit_ub_key, 
&arena_);
+  // Reset the exclusive_upper_bound_key only when we can get a valid and 
smaller upper bound key.
+  // In the case IncrementEncodedKey return ERROR status due to allocation 
fails or no
+  // lexicographically greater key exists, we fall back to scan the rowset 
without optimizing the
+  // upper bound PK, we may scan more rows but we will still get the right 
result.
+  if (s.ok() && (!ub_key || ub_key->encoded_key() > 
implicit_ub_key->encoded_key())) {
     spec->SetExclusiveUpperBoundKey(implicit_ub_key);
     modify_upper_bound_key = true;
   }

Reply via email to