This is an automated email from the ASF dual-hosted git repository.
liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6bedcfa28a [GLUTEN-8073][CH] Replace some deprecated methods about
sort (#8079)
6bedcfa28a is described below
commit 6bedcfa28af4a16c82e79d06d5d08c09a109fcb0
Author: lgbo <[email protected]>
AuthorDate: Thu Nov 28 16:44:26 2024 +0800
[GLUTEN-8073][CH] Replace some deprecated methods about sort (#8079)
What changes were proposed in this pull request?
(Please fill in changes proposed in this fix)
Fixes: #8073
How was this patch tested?
(Please explain how this patch was tested. E.g. unit tests, integration
tests, manual tests)
unit tests
(If this patch involves UI changes, please attach a screenshot; otherwise,
remove this)
---
.../AggregateFunctions/GroupLimitFunctions.cpp | 4 +-
cpp-ch/local-engine/Operator/BranchStep.h | 4 +-
.../Parser/RelParsers/GroupLimitRelParser.cpp | 15 ++-----
.../Parser/RelParsers/GroupLimitRelParser.h | 5 +--
.../RelParsers/SortParsingUtils.cpp} | 4 +-
.../RelParsers/SortParsingUtils.h} | 0
.../Parser/RelParsers/SortRelParser.cpp | 43 +-------------------
.../local-engine/Parser/RelParsers/SortRelParser.h | 2 -
.../Parser/RelParsers/WindowRelParser.cpp | 47 ++--------------------
.../Parser/RelParsers/WindowRelParser.h | 1 -
cpp-ch/local-engine/tests/gtest_ch_join.cpp | 4 +-
11 files changed, 18 insertions(+), 111 deletions(-)
diff --git a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
index 137ae8a544..a9be46908b 100644
--- a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
+++ b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <new>
#include <vector>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
@@ -41,7 +40,6 @@
#include <Poco/Logger.h>
#include <Common/logger_useful.h>
-#include "base/defines.h"
namespace DB::ErrorCodes
{
@@ -182,7 +180,7 @@ class RowNumGroupArraySorted final : public
DB::IAggregateFunctionDataHelper<Row
public:
explicit RowNumGroupArraySorted(DB::DataTypePtr data_type, const DB::Array
& parameters_)
: DB::IAggregateFunctionDataHelper<RowNumGroupArraySortedData,
RowNumGroupArraySorted>(
- {data_type}, parameters_, getRowNumReultDataType(data_type))
+ {data_type}, parameters_, getRowNumReultDataType(data_type))
{
if (parameters_.size() != 2)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "{} needs two
parameters: limit and order clause", getName());
diff --git a/cpp-ch/local-engine/Operator/BranchStep.h
b/cpp-ch/local-engine/Operator/BranchStep.h
index ddbd4c6fbb..fd2203aae8 100644
--- a/cpp-ch/local-engine/Operator/BranchStep.h
+++ b/cpp-ch/local-engine/Operator/BranchStep.h
@@ -21,10 +21,10 @@
#include <Interpreters/Context_fwd.h>
#include <Processors/Chunk.h>
#include <Processors/IProcessor.h>
+#include <Processors/Port.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
-#include "Processors/Port.h"
-#include "Processors/QueryPlan/QueryPlan.h"
+#include <Processors/QueryPlan/QueryPlan.h>
namespace local_engine
{
diff --git a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
index 06f68e8ae2..e39878cae8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
@@ -17,7 +17,6 @@
#include "GroupLimitRelParser.h"
#include <memory>
-#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <Columns/IColumn.h>
@@ -34,6 +33,7 @@
#include <Operator/GraceMergingAggregatedStep.h>
#include <Operator/WindowGroupLimitStep.h>
#include <Parser/AdvancedParametersParseUtil.h>
+#include <Parser/RelParsers/SortParsingUtils.h>
#include <Parser/RelParsers/SortRelParser.h>
#include <Processors/IProcessor.h>
#include <Processors/Port.h>
@@ -54,7 +54,6 @@
#include <Common/CHUtil.h>
#include <Common/GlutenConfig.h>
#include <Common/QueryContext.h>
-#include <Common/SortUtils.h>
#include <Common/logger_useful.h>
namespace DB::ErrorCodes
@@ -414,15 +413,9 @@ void
AggregateGroupLimitRelParser::postProjectionForExplodingArrays(DB::QueryPla
void AggregateGroupLimitRelParser::addSortStep(DB::QueryPlan & plan)
{
auto header = plan.getCurrentHeader();
- DB::SortDescription full_sort_descr;
- auto partition_fields =
parsePartitionFields(win_rel_def->partition_expressions());
- for (auto field : partition_fields)
- {
- const auto & col = header.getByPosition(field);
- full_sort_descr.emplace_back(col.name, 1, -1);
- }
- auto sort_desrc =
SortRelParser::parseSortDescription(win_rel_def->sorts(), header);
- full_sort_descr.insert(full_sort_descr.end(), sort_desrc.begin(),
sort_desrc.end());
+ auto full_sort_descr = parseSortFields(header,
win_rel_def->partition_expressions());
+ auto sort_descr = parseSortFields(header, win_rel_def->sorts());
+ full_sort_descr.insert(full_sort_descr.end(), sort_descr.begin(),
sort_descr.end());
DB::SortingStep::Settings settings(*getContext());
auto config = MemoryConfig::loadFromContext(getContext());
diff --git a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
index b9f3aa6631..6f570ba179 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
@@ -18,10 +18,7 @@
#include <optional>
#include <Parser/RelParsers/RelParser.h>
#include <Processors/QueryPlan/QueryPlan.h>
-#include <Poco/Logger.h>
-#include <Common/logger_useful.h>
-#include "Analyzer/IQueryTreeNode.h"
-#include "substrait/algebra.pb.h"
+#include <substrait/algebra.pb.h>
namespace local_engine
{
diff --git a/cpp-ch/local-engine/Common/SortUtils.cpp
b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
similarity index 99%
rename from cpp-ch/local-engine/Common/SortUtils.cpp
rename to cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
index 1b18cc4bfa..70cacf0633 100644
--- a/cpp-ch/local-engine/Common/SortUtils.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "SortUtils.h"
+#include "SortParsingUtils.h"
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Poco/Logger.h>
@@ -37,7 +37,7 @@ DB::SortDescription parseSortFields(const DB::Block & header,
const google::prot
{
auto pos =
expr.selection().direct_reference().struct_field().field();
const auto & col_name = header.getByPosition(pos).name;
- description.push_back(DB::SortColumnDescription(col_name, 1, 1));
+ description.push_back(DB::SortColumnDescription(col_name, 1, -1));
}
else if (expr.has_literal())
continue;
diff --git a/cpp-ch/local-engine/Common/SortUtils.h
b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.h
similarity index 100%
rename from cpp-ch/local-engine/Common/SortUtils.h
rename to cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.h
diff --git a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
index 1ed4f2565d..a8023a9818 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
@@ -16,6 +16,7 @@
*/
#include "SortRelParser.h"
+#include <Parser/RelParsers/SortParsingUtils.h>
#include <Parser/RelParsers/RelParser.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Common/GlutenConfig.h>
@@ -41,7 +42,7 @@ SortRelParser::parse(DB::QueryPlanPtr query_plan, const
substrait::Rel & rel, st
{
size_t limit = parseLimit(rel_stack_);
const auto & sort_rel = rel.sort();
- auto sort_descr = parseSortDescription(sort_rel.sorts(),
query_plan->getCurrentHeader());
+ auto sort_descr = parseSortFields(query_plan->getCurrentHeader(),
sort_rel.sorts());
SortingStep::Settings settings(*getContext());
auto config = MemoryConfig::loadFromContext(getContext());
double spill_mem_ratio = config.spill_mem_ratio;
@@ -53,46 +54,6 @@ SortRelParser::parse(DB::QueryPlanPtr query_plan, const
substrait::Rel & rel, st
return query_plan;
}
-DB::SortDescription
-SortRelParser::parseSortDescription(const
google::protobuf::RepeatedPtrField<substrait::SortField> & sort_fields, const
DB::Block & header)
-{
- static std::map<int, std::pair<int, int>> direction_map = {{1, {1, -1}},
{2, {1, 1}}, {3, {-1, 1}}, {4, {-1, -1}}};
-
- DB::SortDescription sort_descr;
- for (int i = 0, sz = sort_fields.size(); i < sz; ++i)
- {
- const auto & sort_field = sort_fields[i];
- /// There is no meaning to sort a const column.
- if (sort_field.expr().has_literal())
- continue;
-
- if (!sort_field.expr().has_selection() ||
!sort_field.expr().selection().has_direct_reference()
- ||
!sort_field.expr().selection().direct_reference().has_struct_field())
- {
- throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupport sort
field");
- }
- auto field_pos =
sort_field.expr().selection().direct_reference().struct_field().field();
-
- auto direction_iter = direction_map.find(sort_field.direction());
- if (direction_iter == direction_map.end())
- {
- throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsuppor sort
direction: {}", sort_field.direction());
- }
- if (header.columns())
- {
- const auto & col_name = header.getByPosition(field_pos).name;
- sort_descr.emplace_back(col_name, direction_iter->second.first,
direction_iter->second.second);
- sort_descr.back().column_name = col_name;
- }
- else
- {
- const auto & col_name = header.getByPosition(field_pos).name;
- sort_descr.emplace_back(col_name, direction_iter->second.first,
direction_iter->second.second);
- }
- }
- return sort_descr;
-}
-
size_t SortRelParser::parseLimit(std::list<const substrait::Rel *> &
rel_stack_)
{
if (rel_stack_.empty())
diff --git a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
index 7e6119095c..27cd4497d1 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
@@ -30,8 +30,6 @@ public:
DB::QueryPlanPtr
parse(DB::QueryPlanPtr query_plan, const substrait::Rel & sort_rel,
std::list<const substrait::Rel *> & rel_stack_) override;
- static DB::SortDescription
- parseSortDescription(const
google::protobuf::RepeatedPtrField<substrait::SortField> & sort_fields, const
DB::Block & header);
std::optional<const substrait::Rel *> getSingleInput(const substrait::Rel
& rel) override { return &rel.sort().input(); }
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
index d52f2543c8..d3ed22cbb8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
@@ -29,6 +29,7 @@
#include <IO/WriteBufferFromString.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/WindowDescription.h>
+#include <Parser/RelParsers/SortParsingUtils.h>
#include <Parser/RelParsers/RelParser.h>
#include <Parser/RelParsers/SortRelParser.h>
#include <Parser/TypeParser.h>
@@ -97,8 +98,8 @@ DB::WindowDescription
WindowRelParser::parseWindowDescription(const WindowInfo &
{
DB::WindowDescription win_descr;
win_descr.frame = parseWindowFrame(win_info);
- win_descr.partition_by = parsePartitionBy(win_info.partition_exprs);
- win_descr.order_by =
SortRelParser::parseSortDescription(win_info.sort_fields,
current_plan->getCurrentHeader());
+ win_descr.partition_by = parseSortFields(current_plan->getCurrentHeader(),
win_info.partition_exprs);
+ win_descr.order_by = parseSortFields(current_plan->getCurrentHeader(),
win_info.sort_fields);
win_descr.full_sort_description = win_descr.partition_by;
win_descr.full_sort_description.insert(win_descr.full_sort_description.end(),
win_descr.order_by.begin(), win_descr.order_by.end());
@@ -177,17 +178,11 @@ WindowRelParser::parseWindowFrameType(const std::string &
function_name, const s
frame_type = window_function.window_type();
if (frame_type == substrait::ROWS)
- {
return DB::WindowFrame::FrameType::ROWS;
- }
else if (frame_type == substrait::RANGE)
- {
return DB::WindowFrame::FrameType::RANGE;
- }
else
- {
throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Unknow window frame
type:{}", frame_type);
- }
}
void WindowRelParser::parseBoundType(
@@ -206,13 +201,9 @@ void WindowRelParser::parseBoundType(
bound_type = DB::WindowFrame::BoundaryType::Offset;
preceding_direction = preceding.offset() >= 0;
if (preceding.offset() < 0)
- {
offset = 0 - preceding.offset();
- }
else
- {
offset = preceding.offset();
- }
}
else if (bound.has_following())
{
@@ -220,13 +211,9 @@ void WindowRelParser::parseBoundType(
bound_type = DB::WindowFrame::BoundaryType::Offset;
preceding_direction = following.offset() < 0;
if (following.offset() < 0)
- {
offset = 0 - following.offset();
- }
else
- {
offset = following.offset();
- }
}
else if (bound.has_current_row())
{
@@ -252,31 +239,6 @@ void WindowRelParser::parseBoundType(
}
}
-DB::SortDescription WindowRelParser::parsePartitionBy(const
google::protobuf::RepeatedPtrField<substrait::Expression> & expressions)
-{
- DB::Block header = current_plan->getCurrentHeader();
- DB::SortDescription sort_descr;
- for (const auto & expr : expressions)
- {
- if (expr.has_selection())
- {
- auto pos =
expr.selection().direct_reference().struct_field().field();
- auto col_name = header.getByPosition(pos).name;
- sort_descr.push_back(DB::SortColumnDescription(col_name, 1, 1));
- }
- else if (expr.has_literal())
- {
- // literal is a special case, see in #2586
- continue;
- }
- else
- {
- throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unknow partition
argument type: {}", expr.DebugString());
- }
- }
- return sort_descr;
-}
-
WindowFunctionDescription WindowRelParser::parseWindowFunctionDescription(
const String & ch_function_name,
const substrait::Expression::WindowFunction & window_function,
@@ -370,8 +332,7 @@ void WindowRelParser::tryAddProjectionAfterWindow()
{
ActionsDAG convert_action = ActionsDAG::makeConvertingActions(
current_header.getColumnsWithTypeAndName(),
output_header.getColumnsWithTypeAndName(),
DB::ActionsDAG::MatchColumnsMode::Name);
- QueryPlanStepPtr convert_step
- =
std::make_unique<DB::ExpressionStep>(current_plan->getCurrentHeader(),
std::move(convert_action));
+ QueryPlanStepPtr convert_step =
std::make_unique<DB::ExpressionStep>(current_plan->getCurrentHeader(),
std::move(convert_action));
convert_step->setStepDescription("Convert window Output");
steps.emplace_back(convert_step.get());
current_plan->addStep(std::move(convert_step));
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
index da058af857..fccdb1c6f4 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
@@ -78,7 +78,6 @@ private:
DB::WindowFrame::BoundaryType & bound_type,
Field & offset,
bool & preceding);
- DB::SortDescription parsePartitionBy(const
google::protobuf::RepeatedPtrField<substrait::Expression> & expressions);
DB::WindowFunctionDescription parseWindowFunctionDescription(
const String & ch_function_name,
const substrait::Expression::WindowFunction & window_function,
diff --git a/cpp-ch/local-engine/tests/gtest_ch_join.cpp
b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
index 52120cede0..02d4312474 100644
--- a/cpp-ch/local-engine/tests/gtest_ch_join.cpp
+++ b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
@@ -123,7 +123,7 @@ TEST(TestJoin, simple)
auto hash_join = std::make_shared<HashJoin>(join,
right_plan.getCurrentHeader());
QueryPlanStepPtr join_step
- = std::make_unique<JoinStep>(left_plan.getCurrentHeader(),
right_plan.getCurrentHeader(), hash_join, 8192, 1, false);
+ = std::make_unique<JoinStep>(left_plan.getCurrentHeader(),
right_plan.getCurrentHeader(), hash_join, 8192, 8192, 1, false);
std::cerr << "join step:" << join_step->getOutputHeader().dumpStructure()
<< std::endl;
@@ -145,4 +145,4 @@ TEST(TestJoin, simple)
auto res = pipeline->getHeader().cloneEmpty();
executor.pull(res);
debug::headBlock(res);
-}
\ No newline at end of file
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]