This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 5b00edde01 [GLUTEN-7179][CH] Fix infinite loop with parquet column
index reader (#7185)
5b00edde01 is described below
commit 5b00edde0190297e0504631493824568e04df795
Author: Wenzheng Liu <[email protected]>
AuthorDate: Wed Sep 11 10:25:47 2024 +0800
[GLUTEN-7179][CH] Fix infinite loop with parquet column index reader (#7185)
---
.../Storages/Parquet/ColumnIndexFilter.cpp | 4 +-
.../tests/gtest_parquet_columnindex.cpp | 48 ++++++++++++++++++++++
2 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
index 1063cba8a0..52eb7a4f0d 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
@@ -393,7 +393,7 @@ struct DESCENDING : BoundaryOrder
{
if (lowerLeft > lowerRight)
return std::nullopt;
- Int32 i = std::floor((lowerLeft + lowerRight) / 2);
+ Int32 i = floorMid(lowerLeft, lowerRight);
if (comparator.compareValueToMax(i) > 0)
lowerRight = upperRight = i - 1;
else if (comparator.compareValueToMin(i) < 0)
@@ -406,7 +406,7 @@ struct DESCENDING : BoundaryOrder
{
if (upperLeft > upperRight)
return std::nullopt;
- Int32 i = std::ceil((upperLeft + upperRight) / 2);
+ Int32 i = ceilingMid(upperLeft, upperRight);
if (comparator.compareValueToMax(i) > 0)
upperRight = i - 1;
else if (comparator.compareValueToMin(i) < 0)
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
index e42a2a89a3..48ea16a4bc 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -42,6 +42,24 @@
#include <Common/BlockTypeUtils.h>
#include <Common/QueryContext.h>
+
+# define ASSERT_DURATION_LE(secs, stmt) \
+ { \
+ std::promise<bool> completed; \
+ auto stmt_future = completed.get_future(); \
+ std::thread( \
+ [&](std::promise<bool> & completed) \
+ { \
+ stmt; \
+ completed.set_value(true); \
+ }, \
+ std::ref(completed)) \
+ .detach(); \
+ if (stmt_future.wait_for(std::chrono::seconds(secs)) ==
std::future_status::timeout) \
+ GTEST_FATAL_FAILURE_(" timed out (> " #secs " seconds).
Check code for infinite loops"); \
+ }
+
+
namespace DB::ErrorCodes
{
extern const int LOGICAL_ERROR;
@@ -163,6 +181,13 @@ public:
return *this;
}
+ CIBuilder & addSamePages(size_t num, int64_t nullCount, const std::string
& min, const std::string & max)
+ {
+ for (size_t i = 0; i < num; ++i)
+ addPage(nullCount, min, max);
+ return *this;
+ }
+
parquet::ColumnIndexPtr build() const
{
const parquet::ColumnDescriptor descr(node_,
/*max_definition_level=*/1, 0);
@@ -188,6 +213,13 @@ public:
return *this;
}
+ OIBuilder & addSamePages(size_t num, size_t row_count)
+ {
+ for (size_t i = 0; i < num; ++i)
+ addPage(row_count);
+ return *this;
+ }
+
parquet::OffsetIndexPtr build() const
{
parquet::OffsetIndexBuilderPtr builder =
parquet::OffsetIndexBuilder::Make();
@@ -298,6 +330,13 @@ static const CIBuilder c5 =
CIBuilder(PNB::optional(parquet::Type::INT64).named(
static const OIBuilder o5 = OIBuilder().addPage(1).addPage(29);
static const parquet::ColumnDescriptor d5 = c5.descr();
+// GLUTEN-7179 - test customer.c_mktsegment = 'BUILDING'
+static const CIBuilder c6 =
CIBuilder(PNB::optional(parquet::Type::BYTE_ARRAY).as(parquet::ConvertedType::UTF8).named("c_mktsegment"))
+ .addSamePages(75, 0, "AUTOMOBILE", "MACHINERY")
+ .addPage(0, "AUTOMOBILE", "FURNITURE");
+static const OIBuilder o6 = OIBuilder().addSamePages(77, 10);
+static const parquet::ColumnDescriptor d6 = c6.descr();
+
local_engine::ColumnIndexStore buildTestColumnIndexStore()
{
local_engine::ColumnIndexStore result;
@@ -306,6 +345,7 @@ local_engine::ColumnIndexStore buildTestColumnIndexStore()
result[d3.name()] = std::move(local_engine::ColumnIndex::create(&d3,
c3.build(), o3.build()));
result[d4.name()] = std::move(local_engine::ColumnIndex::create(&d4,
nullptr, o3.build()));
result[d5.name()] = std::move(local_engine::ColumnIndex::create(&d5,
c5.build(), o5.build()));
+ result[d6.name()] = std::move(local_engine::ColumnIndex::create(&d6,
c6.build(), o6.build()));
return result;
}
@@ -317,6 +357,7 @@ AnotherRowType buildTestRowType()
result.emplace_back(toAnotherFieldType(d3));
result.emplace_back(toAnotherFieldType(d4));
result.emplace_back(toAnotherFieldType(d5));
+ result.emplace_back(toAnotherFieldType(d6));
return result;
}
@@ -469,6 +510,13 @@ TEST(ColumnIndex, FilteringWithAllNullPages)
// testCondition("column5 == 1234567", TOTALSIZE);
// testCondition("column5 >= 1234567", TOTALSIZE);
}
+
+TEST(ColumnIndex, GLUTEN_7179_INFINTE_LOOP)
+{
+ using namespace test_utils;
+ ASSERT_DURATION_LE(10, { testCondition("c_mktsegment = 'BUILDING'", 760);
})
+}
+
TEST(ColumnIndex, FilteringWithNotFoundColumnName)
{
using namespace test_utils;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]