This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 6bedcfa28a [GLUTEN-8073][CH] Replace some deprecated methods about 
sort (#8079)
6bedcfa28a is described below

commit 6bedcfa28af4a16c82e79d06d5d08c09a109fcb0
Author: lgbo <[email protected]>
AuthorDate: Thu Nov 28 16:44:26 2024 +0800

    [GLUTEN-8073][CH] Replace some deprecated methods about sort (#8079)
    
    What changes were proposed in this pull request?
    (Please fill in changes proposed in this fix)
    
    Fixes: #8073
    
    How was this patch tested?
    (Please explain how this patch was tested. E.g. unit tests, integration 
tests, manual tests)
    
    unit tests
    
    (If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
---
 .../AggregateFunctions/GroupLimitFunctions.cpp     |  4 +-
 cpp-ch/local-engine/Operator/BranchStep.h          |  4 +-
 .../Parser/RelParsers/GroupLimitRelParser.cpp      | 15 ++-----
 .../Parser/RelParsers/GroupLimitRelParser.h        |  5 +--
 .../RelParsers/SortParsingUtils.cpp}               |  4 +-
 .../RelParsers/SortParsingUtils.h}                 |  0
 .../Parser/RelParsers/SortRelParser.cpp            | 43 +-------------------
 .../local-engine/Parser/RelParsers/SortRelParser.h |  2 -
 .../Parser/RelParsers/WindowRelParser.cpp          | 47 ++--------------------
 .../Parser/RelParsers/WindowRelParser.h            |  1 -
 cpp-ch/local-engine/tests/gtest_ch_join.cpp        |  4 +-
 11 files changed, 18 insertions(+), 111 deletions(-)

diff --git a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp 
b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
index 137ae8a544..a9be46908b 100644
--- a/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
+++ b/cpp-ch/local-engine/AggregateFunctions/GroupLimitFunctions.cpp
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <new>
 #include <vector>
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/FactoryHelpers.h>
@@ -41,7 +40,6 @@
 
 #include <Poco/Logger.h>
 #include <Common/logger_useful.h>
-#include "base/defines.h"
 
 namespace DB::ErrorCodes
 {
@@ -182,7 +180,7 @@ class RowNumGroupArraySorted final : public 
DB::IAggregateFunctionDataHelper<Row
 public:
     explicit RowNumGroupArraySorted(DB::DataTypePtr data_type, const DB::Array 
& parameters_)
         : DB::IAggregateFunctionDataHelper<RowNumGroupArraySortedData, 
RowNumGroupArraySorted>(
-            {data_type}, parameters_, getRowNumReultDataType(data_type))
+              {data_type}, parameters_, getRowNumReultDataType(data_type))
     {
         if (parameters_.size() != 2)
             throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "{} needs two 
parameters: limit and order clause", getName());
diff --git a/cpp-ch/local-engine/Operator/BranchStep.h 
b/cpp-ch/local-engine/Operator/BranchStep.h
index ddbd4c6fbb..fd2203aae8 100644
--- a/cpp-ch/local-engine/Operator/BranchStep.h
+++ b/cpp-ch/local-engine/Operator/BranchStep.h
@@ -21,10 +21,10 @@
 #include <Interpreters/Context_fwd.h>
 #include <Processors/Chunk.h>
 #include <Processors/IProcessor.h>
+#include <Processors/Port.h>
 #include <Processors/QueryPlan/IQueryPlanStep.h>
 #include <Processors/QueryPlan/ITransformingStep.h>
-#include "Processors/Port.h"
-#include "Processors/QueryPlan/QueryPlan.h"
+#include <Processors/QueryPlan/QueryPlan.h>
 
 namespace local_engine
 {
diff --git a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
index 06f68e8ae2..e39878cae8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.cpp
@@ -17,7 +17,6 @@
 
 #include "GroupLimitRelParser.h"
 #include <memory>
-#include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <Columns/IColumn.h>
@@ -34,6 +33,7 @@
 #include <Operator/GraceMergingAggregatedStep.h>
 #include <Operator/WindowGroupLimitStep.h>
 #include <Parser/AdvancedParametersParseUtil.h>
+#include <Parser/RelParsers/SortParsingUtils.h>
 #include <Parser/RelParsers/SortRelParser.h>
 #include <Processors/IProcessor.h>
 #include <Processors/Port.h>
@@ -54,7 +54,6 @@
 #include <Common/CHUtil.h>
 #include <Common/GlutenConfig.h>
 #include <Common/QueryContext.h>
-#include <Common/SortUtils.h>
 #include <Common/logger_useful.h>
 
 namespace DB::ErrorCodes
@@ -414,15 +413,9 @@ void 
AggregateGroupLimitRelParser::postProjectionForExplodingArrays(DB::QueryPla
 void AggregateGroupLimitRelParser::addSortStep(DB::QueryPlan & plan)
 {
     auto header = plan.getCurrentHeader();
-    DB::SortDescription full_sort_descr;
-    auto partition_fields = 
parsePartitionFields(win_rel_def->partition_expressions());
-    for (auto field : partition_fields)
-    {
-        const auto & col = header.getByPosition(field);
-        full_sort_descr.emplace_back(col.name, 1, -1);
-    }
-    auto sort_desrc = 
SortRelParser::parseSortDescription(win_rel_def->sorts(), header);
-    full_sort_descr.insert(full_sort_descr.end(), sort_desrc.begin(), 
sort_desrc.end());
+    auto full_sort_descr = parseSortFields(header, 
win_rel_def->partition_expressions());
+    auto sort_descr = parseSortFields(header, win_rel_def->sorts());
+    full_sort_descr.insert(full_sort_descr.end(), sort_descr.begin(), 
sort_descr.end());
 
     DB::SortingStep::Settings settings(*getContext());
     auto config = MemoryConfig::loadFromContext(getContext());
diff --git a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h 
b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
index b9f3aa6631..6f570ba179 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/GroupLimitRelParser.h
@@ -18,10 +18,7 @@
 #include <optional>
 #include <Parser/RelParsers/RelParser.h>
 #include <Processors/QueryPlan/QueryPlan.h>
-#include <Poco/Logger.h>
-#include <Common/logger_useful.h>
-#include "Analyzer/IQueryTreeNode.h"
-#include "substrait/algebra.pb.h"
+#include <substrait/algebra.pb.h>
 
 namespace local_engine
 {
diff --git a/cpp-ch/local-engine/Common/SortUtils.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
similarity index 99%
rename from cpp-ch/local-engine/Common/SortUtils.cpp
rename to cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
index 1b18cc4bfa..70cacf0633 100644
--- a/cpp-ch/local-engine/Common/SortUtils.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.cpp
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "SortUtils.h"
+#include "SortParsingUtils.h"
 #include <IO/Operators.h>
 #include <IO/WriteBufferFromString.h>
 #include <Poco/Logger.h>
@@ -37,7 +37,7 @@ DB::SortDescription parseSortFields(const DB::Block & header, 
const google::prot
         {
             auto pos = 
expr.selection().direct_reference().struct_field().field();
             const auto & col_name = header.getByPosition(pos).name;
-            description.push_back(DB::SortColumnDescription(col_name, 1, 1));
+            description.push_back(DB::SortColumnDescription(col_name, 1, -1));
         }
         else if (expr.has_literal())
             continue;
diff --git a/cpp-ch/local-engine/Common/SortUtils.h 
b/cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.h
similarity index 100%
rename from cpp-ch/local-engine/Common/SortUtils.h
rename to cpp-ch/local-engine/Parser/RelParsers/SortParsingUtils.h
diff --git a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
index 1ed4f2565d..a8023a9818 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.cpp
@@ -16,6 +16,7 @@
  */
 #include "SortRelParser.h"
 
+#include <Parser/RelParsers/SortParsingUtils.h>
 #include <Parser/RelParsers/RelParser.h>
 #include <Processors/QueryPlan/SortingStep.h>
 #include <Common/GlutenConfig.h>
@@ -41,7 +42,7 @@ SortRelParser::parse(DB::QueryPlanPtr query_plan, const 
substrait::Rel & rel, st
 {
     size_t limit = parseLimit(rel_stack_);
     const auto & sort_rel = rel.sort();
-    auto sort_descr = parseSortDescription(sort_rel.sorts(), 
query_plan->getCurrentHeader());
+    auto sort_descr = parseSortFields(query_plan->getCurrentHeader(), 
sort_rel.sorts());
     SortingStep::Settings settings(*getContext());
     auto config = MemoryConfig::loadFromContext(getContext());
     double spill_mem_ratio = config.spill_mem_ratio;
@@ -53,46 +54,6 @@ SortRelParser::parse(DB::QueryPlanPtr query_plan, const 
substrait::Rel & rel, st
     return query_plan;
 }
 
-DB::SortDescription
-SortRelParser::parseSortDescription(const 
google::protobuf::RepeatedPtrField<substrait::SortField> & sort_fields, const 
DB::Block & header)
-{
-    static std::map<int, std::pair<int, int>> direction_map = {{1, {1, -1}}, 
{2, {1, 1}}, {3, {-1, 1}}, {4, {-1, -1}}};
-
-    DB::SortDescription sort_descr;
-    for (int i = 0, sz = sort_fields.size(); i < sz; ++i)
-    {
-        const auto & sort_field = sort_fields[i];
-        /// There is no meaning to sort a const column.
-        if (sort_field.expr().has_literal())
-            continue;
-
-        if (!sort_field.expr().has_selection() || 
!sort_field.expr().selection().has_direct_reference()
-            || 
!sort_field.expr().selection().direct_reference().has_struct_field())
-        {
-            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupport sort 
field");
-        }
-        auto field_pos = 
sort_field.expr().selection().direct_reference().struct_field().field();
-
-        auto direction_iter = direction_map.find(sort_field.direction());
-        if (direction_iter == direction_map.end())
-        {
-            throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsuppor sort 
direction: {}", sort_field.direction());
-        }
-        if (header.columns())
-        {
-            const auto & col_name = header.getByPosition(field_pos).name;
-            sort_descr.emplace_back(col_name, direction_iter->second.first, 
direction_iter->second.second);
-            sort_descr.back().column_name = col_name;
-        }
-        else
-        {
-            const auto & col_name = header.getByPosition(field_pos).name;
-            sort_descr.emplace_back(col_name, direction_iter->second.first, 
direction_iter->second.second);
-        }
-    }
-    return sort_descr;
-}
-
 size_t SortRelParser::parseLimit(std::list<const substrait::Rel *> & 
rel_stack_)
 {
     if (rel_stack_.empty())
diff --git a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h 
b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
index 7e6119095c..27cd4497d1 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/SortRelParser.h
@@ -30,8 +30,6 @@ public:
 
     DB::QueryPlanPtr
     parse(DB::QueryPlanPtr query_plan, const substrait::Rel & sort_rel, 
std::list<const substrait::Rel *> & rel_stack_) override;
-    static DB::SortDescription
-    parseSortDescription(const 
google::protobuf::RepeatedPtrField<substrait::SortField> & sort_fields, const 
DB::Block & header);
 
     std::optional<const substrait::Rel *> getSingleInput(const substrait::Rel 
& rel) override { return &rel.sort().input(); }
 
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp 
b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
index d52f2543c8..d3ed22cbb8 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.cpp
@@ -29,6 +29,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/WindowDescription.h>
+#include <Parser/RelParsers/SortParsingUtils.h>
 #include <Parser/RelParsers/RelParser.h>
 #include <Parser/RelParsers/SortRelParser.h>
 #include <Parser/TypeParser.h>
@@ -97,8 +98,8 @@ DB::WindowDescription 
WindowRelParser::parseWindowDescription(const WindowInfo &
 {
     DB::WindowDescription win_descr;
     win_descr.frame = parseWindowFrame(win_info);
-    win_descr.partition_by = parsePartitionBy(win_info.partition_exprs);
-    win_descr.order_by = 
SortRelParser::parseSortDescription(win_info.sort_fields, 
current_plan->getCurrentHeader());
+    win_descr.partition_by = parseSortFields(current_plan->getCurrentHeader(), 
win_info.partition_exprs);
+    win_descr.order_by = parseSortFields(current_plan->getCurrentHeader(), 
win_info.sort_fields);
     win_descr.full_sort_description = win_descr.partition_by;
     
win_descr.full_sort_description.insert(win_descr.full_sort_description.end(), 
win_descr.order_by.begin(), win_descr.order_by.end());
 
@@ -177,17 +178,11 @@ WindowRelParser::parseWindowFrameType(const std::string & 
function_name, const s
         frame_type = window_function.window_type();
 
     if (frame_type == substrait::ROWS)
-    {
         return DB::WindowFrame::FrameType::ROWS;
-    }
     else if (frame_type == substrait::RANGE)
-    {
         return DB::WindowFrame::FrameType::RANGE;
-    }
     else
-    {
         throw DB::Exception(DB::ErrorCodes::UNKNOWN_TYPE, "Unknow window frame 
type:{}", frame_type);
-    }
 }
 
 void WindowRelParser::parseBoundType(
@@ -206,13 +201,9 @@ void WindowRelParser::parseBoundType(
         bound_type = DB::WindowFrame::BoundaryType::Offset;
         preceding_direction = preceding.offset() >= 0;
         if (preceding.offset() < 0)
-        {
             offset = 0 - preceding.offset();
-        }
         else
-        {
             offset = preceding.offset();
-        }
     }
     else if (bound.has_following())
     {
@@ -220,13 +211,9 @@ void WindowRelParser::parseBoundType(
         bound_type = DB::WindowFrame::BoundaryType::Offset;
         preceding_direction = following.offset() < 0;
         if (following.offset() < 0)
-        {
             offset = 0 - following.offset();
-        }
         else
-        {
             offset = following.offset();
-        }
     }
     else if (bound.has_current_row())
     {
@@ -252,31 +239,6 @@ void WindowRelParser::parseBoundType(
     }
 }
 
-DB::SortDescription WindowRelParser::parsePartitionBy(const 
google::protobuf::RepeatedPtrField<substrait::Expression> & expressions)
-{
-    DB::Block header = current_plan->getCurrentHeader();
-    DB::SortDescription sort_descr;
-    for (const auto & expr : expressions)
-    {
-        if (expr.has_selection())
-        {
-            auto pos = 
expr.selection().direct_reference().struct_field().field();
-            auto col_name = header.getByPosition(pos).name;
-            sort_descr.push_back(DB::SortColumnDescription(col_name, 1, 1));
-        }
-        else if (expr.has_literal())
-        {
-            // literal is a special case, see in #2586
-            continue;
-        }
-        else
-        {
-            throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unknow partition 
argument type: {}", expr.DebugString());
-        }
-    }
-    return sort_descr;
-}
-
 WindowFunctionDescription WindowRelParser::parseWindowFunctionDescription(
     const String & ch_function_name,
     const substrait::Expression::WindowFunction & window_function,
@@ -370,8 +332,7 @@ void WindowRelParser::tryAddProjectionAfterWindow()
     {
         ActionsDAG convert_action = ActionsDAG::makeConvertingActions(
             current_header.getColumnsWithTypeAndName(), 
output_header.getColumnsWithTypeAndName(), 
DB::ActionsDAG::MatchColumnsMode::Name);
-        QueryPlanStepPtr convert_step
-            = 
std::make_unique<DB::ExpressionStep>(current_plan->getCurrentHeader(), 
std::move(convert_action));
+        QueryPlanStepPtr convert_step = 
std::make_unique<DB::ExpressionStep>(current_plan->getCurrentHeader(), 
std::move(convert_action));
         convert_step->setStepDescription("Convert window Output");
         steps.emplace_back(convert_step.get());
         current_plan->addStep(std::move(convert_step));
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h 
b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
index da058af857..fccdb1c6f4 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/WindowRelParser.h
@@ -78,7 +78,6 @@ private:
         DB::WindowFrame::BoundaryType & bound_type,
         Field & offset,
         bool & preceding);
-    DB::SortDescription parsePartitionBy(const 
google::protobuf::RepeatedPtrField<substrait::Expression> & expressions);
     DB::WindowFunctionDescription parseWindowFunctionDescription(
         const String & ch_function_name,
         const substrait::Expression::WindowFunction & window_function,
diff --git a/cpp-ch/local-engine/tests/gtest_ch_join.cpp 
b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
index 52120cede0..02d4312474 100644
--- a/cpp-ch/local-engine/tests/gtest_ch_join.cpp
+++ b/cpp-ch/local-engine/tests/gtest_ch_join.cpp
@@ -123,7 +123,7 @@ TEST(TestJoin, simple)
     auto hash_join = std::make_shared<HashJoin>(join, 
right_plan.getCurrentHeader());
 
     QueryPlanStepPtr join_step
-        = std::make_unique<JoinStep>(left_plan.getCurrentHeader(), 
right_plan.getCurrentHeader(), hash_join, 8192, 1, false);
+        = std::make_unique<JoinStep>(left_plan.getCurrentHeader(), 
right_plan.getCurrentHeader(), hash_join, 8192, 8192, 1, false);
 
     std::cerr << "join step:" << join_step->getOutputHeader().dumpStructure() 
<< std::endl;
 
@@ -145,4 +145,4 @@ TEST(TestJoin, simple)
     auto res = pipeline->getHeader().cloneEmpty();
     executor.pull(res);
     debug::headBlock(res);
-}
\ No newline at end of file
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to