This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b00edde01 [GLUTEN-7179][CH] Fix infinite loop with parquet column 
index reader (#7185)
5b00edde01 is described below

commit 5b00edde0190297e0504631493824568e04df795
Author: Wenzheng Liu <[email protected]>
AuthorDate: Wed Sep 11 10:25:47 2024 +0800

    [GLUTEN-7179][CH] Fix infinite loop with parquet column index reader (#7185)
---
 .../Storages/Parquet/ColumnIndexFilter.cpp         |  4 +-
 .../tests/gtest_parquet_columnindex.cpp            | 48 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp 
b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
index 1063cba8a0..52eb7a4f0d 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
@@ -393,7 +393,7 @@ struct DESCENDING : BoundaryOrder
         {
             if (lowerLeft > lowerRight)
                 return std::nullopt;
-            Int32 i = std::floor((lowerLeft + lowerRight) / 2);
+            Int32 i = floorMid(lowerLeft, lowerRight);
             if (comparator.compareValueToMax(i) > 0)
                 lowerRight = upperRight = i - 1;
             else if (comparator.compareValueToMin(i) < 0)
@@ -406,7 +406,7 @@ struct DESCENDING : BoundaryOrder
         {
             if (upperLeft > upperRight)
                 return std::nullopt;
-            Int32 i = std::ceil((upperLeft + upperRight) / 2);
+            Int32 i = ceilingMid(upperLeft, upperRight);
             if (comparator.compareValueToMax(i) > 0)
                 upperRight = i - 1;
             else if (comparator.compareValueToMin(i) < 0)
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp 
b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
index e42a2a89a3..48ea16a4bc 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -42,6 +42,24 @@
 #include <Common/BlockTypeUtils.h>
 #include <Common/QueryContext.h>
 
+
+#    define ASSERT_DURATION_LE(secs, stmt) \
+        { \
+            std::promise<bool> completed; \
+            auto stmt_future = completed.get_future(); \
+            std::thread( \
+                [&](std::promise<bool> & completed) \
+                { \
+                    stmt; \
+                    completed.set_value(true); \
+                }, \
+                std::ref(completed)) \
+                .detach(); \
+            if (stmt_future.wait_for(std::chrono::seconds(secs)) == 
std::future_status::timeout) \
+                GTEST_FATAL_FAILURE_("       timed out (> " #secs " seconds). 
Check code for infinite loops"); \
+        }
+
+
 namespace DB::ErrorCodes
 {
 extern const int LOGICAL_ERROR;
@@ -163,6 +181,13 @@ public:
         return *this;
     }
 
+    CIBuilder & addSamePages(size_t num, int64_t nullCount, const std::string 
& min, const std::string & max)
+    {
+        for (size_t i = 0; i < num; ++i)
+            addPage(nullCount, min, max);
+        return *this;
+    }
+
     parquet::ColumnIndexPtr build() const
     {
         const parquet::ColumnDescriptor descr(node_, 
/*max_definition_level=*/1, 0);
@@ -188,6 +213,13 @@ public:
         return *this;
     }
 
+    OIBuilder & addSamePages(size_t num, size_t row_count)
+    {
+        for (size_t i = 0; i < num; ++i)
+            addPage(row_count);
+        return *this;
+    }
+
     parquet::OffsetIndexPtr build() const
     {
         parquet::OffsetIndexBuilderPtr builder = 
parquet::OffsetIndexBuilder::Make();
@@ -298,6 +330,13 @@ static const CIBuilder c5 = 
CIBuilder(PNB::optional(parquet::Type::INT64).named(
 static const OIBuilder o5 = OIBuilder().addPage(1).addPage(29);
 static const parquet::ColumnDescriptor d5 = c5.descr();
 
+// GLUTEN-7179 - test customer.c_mktsegment = 'BUILDING'
+static const CIBuilder c6 = 
CIBuilder(PNB::optional(parquet::Type::BYTE_ARRAY).as(parquet::ConvertedType::UTF8).named("c_mktsegment"))
+                                .addSamePages(75, 0, "AUTOMOBILE", "MACHINERY")
+                                .addPage(0, "AUTOMOBILE", "FURNITURE");
+static const OIBuilder o6 = OIBuilder().addSamePages(77, 10);
+static const parquet::ColumnDescriptor d6 = c6.descr();
+
 local_engine::ColumnIndexStore buildTestColumnIndexStore()
 {
     local_engine::ColumnIndexStore result;
@@ -306,6 +345,7 @@ local_engine::ColumnIndexStore buildTestColumnIndexStore()
     result[d3.name()] = std::move(local_engine::ColumnIndex::create(&d3, 
c3.build(), o3.build()));
     result[d4.name()] = std::move(local_engine::ColumnIndex::create(&d4, 
nullptr, o3.build()));
     result[d5.name()] = std::move(local_engine::ColumnIndex::create(&d5, 
c5.build(), o5.build()));
+    result[d6.name()] = std::move(local_engine::ColumnIndex::create(&d6, 
c6.build(), o6.build()));
     return result;
 }
 
@@ -317,6 +357,7 @@ AnotherRowType buildTestRowType()
     result.emplace_back(toAnotherFieldType(d3));
     result.emplace_back(toAnotherFieldType(d4));
     result.emplace_back(toAnotherFieldType(d5));
+    result.emplace_back(toAnotherFieldType(d6));
     return result;
 }
 
@@ -469,6 +510,13 @@ TEST(ColumnIndex, FilteringWithAllNullPages)
     // testCondition("column5 == 1234567", TOTALSIZE);
     // testCondition("column5 >= 1234567", TOTALSIZE);
 }
+
+TEST(ColumnIndex, GLUTEN_7179_INFINTE_LOOP)
+{
+    using namespace test_utils;
+    ASSERT_DURATION_LE(10, { testCondition("c_mktsegment = 'BUILDING'", 760); 
})
+}
+
 TEST(ColumnIndex, FilteringWithNotFoundColumnName)
 {
     using namespace test_utils;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to