Re: [PR] [CH] Shuffle writer connects to CH pipeline [incubator-gluten]

via GitHub Thu, 05 Sep 2024 23:26:35 -0700


lgbo-ustc commented on code in PR #6723:
URL: https://github.com/apache/incubator-gluten/pull/6723#discussion_r1746568873



##########
cpp-ch/local-engine/Shuffle/SparkExchangeSink.cpp:
##########
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "SparkExchangeSink.h"
+
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Shuffle/PartitionWriter.h>
+#include <jni/jni_common.h>
+#include <jni/CelebornClient.h>
+#include <Poco/StringTokenizer.h>
+#include <Processors/Sinks/NullSink.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <Storages/IO/AggregateSerializationUtils.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+}
+}
+
+using namespace DB;
+
+namespace local_engine
+{
+void SparkExchangeSink::consume(Chunk chunk)
+{
+    Stopwatch wall_time;
+    if (chunk.getNumRows() == 0)
+        return;
+    split_result.total_blocks += 1;
+    split_result.total_rows += chunk.getNumRows();
+    auto aggregate_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+    auto input = 
inputs.front().getHeader().cloneWithColumns(chunk.detachColumns());
+    Stopwatch split_time_watch;
+    if (!sort_writer)
+        input = convertAggregateStateInBlock(input);
+    split_result.total_split_time += split_time_watch.elapsedNanoseconds();
+
+    Stopwatch compute_pid_time_watch;
+    PartitionInfo partition_info = partitioner->build(input);
+    split_result.total_compute_pid_time += 
compute_pid_time_watch.elapsedNanoseconds();
+
+    Block out_block;
+    for (size_t col_i = 0; col_i < output_header.columns(); ++col_i)
+    {
+        out_block.insert(input.getByPosition(output_columns_indicies[col_i]));
+    }
+    if (aggregate_info)
+    {
+        out_block.info.is_overflows = aggregate_info->is_overflows;
+        out_block.info.bucket_num = aggregate_info->bucket_num;
+    }
+    partition_writer->write(partition_info, out_block);
+    split_result.wall_time += wall_time.elapsedNanoseconds();
+}
+
+void SparkExchangeSink::onFinish()
+{
+    Stopwatch wall_time;
+    if (!dynamic_cast<LocalPartitionWriter*>(partition_writer.get()))
+    {
+        partition_writer->evictPartitions();
+    }
+    split_result.wall_time += wall_time.elapsedNanoseconds();
+}
+
+void SparkExchangeSink::initOutputHeader(const Block & block)
+{
+    if (!output_header)
+    {
+        if (output_columns_indicies.empty())
+        {
+            output_header = block.cloneEmpty();
+            for (size_t i = 0; i < block.columns(); ++i)
+                output_columns_indicies.push_back(i);
+        }
+        else
+        {
+            ColumnsWithTypeAndName cols;
+            for (const auto & index : output_columns_indicies)
+                cols.push_back(block.getByPosition(index));
+
+            output_header = Block(std::move(cols));
+        }
+    }
+}
+
+SparkExchangeManager::SparkExchangeManager(const Block& header, const String & 
short_name, const SplitOptions & options_, jobject rss_pusher): 
input_header(materializeBlock(header)), options(options_)
+{
+    if (rss_pusher)
+    {
+        GET_JNIENV(env)
+        jclass celeborn_partition_pusher_class =
+            CreateGlobalClassReference(env, 
"Lorg/apache/spark/shuffle/CelebornPartitionPusher;");
+        jmethodID celeborn_push_partition_data_method =
+            GetMethodID(env, celeborn_partition_pusher_class, 
"pushPartitionData", "(I[BI)I");
+        CLEAN_JNIENV
+        celeborn_client = std::make_unique<CelebornClient>(rss_pusher, 
celeborn_push_partition_data_method);
+        use_rss = true;
+    }
+    if (!partitioner_creators.contains(short_name))
+    {
+        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "unsupported 
splitter {}", short_name);
+    }
+    partitioner_creator = partitioner_creators[short_name];
+    Poco::StringTokenizer output_column_tokenizer(options_.out_exprs, ",");
+    for (const auto & iter : output_column_tokenizer)
+        output_columns_indicies.push_back(std::stoi(iter));
+    auto overhead_memory = header.columns() * 16 * options.split_size * 
options.partition_num;
+    use_sort_shuffle = overhead_memory > options.spill_threshold * 0.5 || 
options.partition_num >= 300 || options.force_memory_sort;
+
+    split_result.partition_lengths.resize(options.partition_num, 0);
+    split_result.raw_partition_lengths.resize(options.partition_num, 0);
+}
+
+std::shared_ptr<PartitionWriter> createPartitionWriter(const SplitOptions& 
options, bool use_sort_shuffle, std::unique_ptr<CelebornClient> celeborn_client)
+{
+    if (celeborn_client)
+    {
+        if (use_sort_shuffle)
+            return 
std::make_shared<MemorySortCelebornPartitionWriter>(options, 
std::move(celeborn_client));
+        return std::make_shared<CelebornPartitionWriter>(options, 
std::move(celeborn_client));
+    }
+    if (use_sort_shuffle)
+        return std::make_shared<MemorySortLocalPartitionWriter>(options);
+    return std::make_shared<LocalPartitionWriter>(options);
+}
+
+void SparkExchangeManager::initSinks(size_t num)
+{
+    if (num > 1 && celeborn_client)
+    {
+        throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "CelebornClient can't 
be used with multiple sinks");
+    }
+    sinks.resize(num);
+    partition_writers.resize(num);
+    for (size_t i = 0; i < num; ++i)
+    {
+        partition_writers[i] = createPartitionWriter(options, 
use_sort_shuffle, std::move(celeborn_client));
+        sinks[i] = std::make_shared<SparkExchangeSink>(input_header, 
partitioner_creator(options), partition_writers[i], output_columns_indicies, 
use_sort_shuffle);
+    }
+}
+
+void SparkExchangeManager::setSinksToPipeline(DB::QueryPipelineBuilder & 
pipeline) const
+{
+    size_t count = 0;
+    Pipe::ProcessorGetterWithStreamKind getter = [&](const Block & header, 
Pipe::StreamType stream_type) -> ProcessorPtr

Review Comment:
   lost namespace `DB`?



##########
cpp-ch/local-engine/Shuffle/SparkExchangeSink.cpp:
##########
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "SparkExchangeSink.h"
+
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Shuffle/PartitionWriter.h>
+#include <jni/jni_common.h>
+#include <jni/CelebornClient.h>
+#include <Poco/StringTokenizer.h>
+#include <Processors/Sinks/NullSink.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
+#include <boost/algorithm/string/case_conv.hpp>
+#include <Storages/IO/AggregateSerializationUtils.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+extern const int BAD_ARGUMENTS;
+}
+}
+
+using namespace DB;
+
+namespace local_engine
+{
+void SparkExchangeSink::consume(Chunk chunk)
+{
+    Stopwatch wall_time;
+    if (chunk.getNumRows() == 0)
+        return;
+    split_result.total_blocks += 1;
+    split_result.total_rows += chunk.getNumRows();
+    auto aggregate_info = chunk.getChunkInfos().get<AggregatedChunkInfo>();
+    auto input = 
inputs.front().getHeader().cloneWithColumns(chunk.detachColumns());
+    Stopwatch split_time_watch;
+    if (!sort_writer)
+        input = convertAggregateStateInBlock(input);
+    split_result.total_split_time += split_time_watch.elapsedNanoseconds();
+
+    Stopwatch compute_pid_time_watch;
+    PartitionInfo partition_info = partitioner->build(input);
+    split_result.total_compute_pid_time += 
compute_pid_time_watch.elapsedNanoseconds();
+
+    Block out_block;
+    for (size_t col_i = 0; col_i < output_header.columns(); ++col_i)
+    {
+        out_block.insert(input.getByPosition(output_columns_indicies[col_i]));
+    }
+    if (aggregate_info)
+    {
+        out_block.info.is_overflows = aggregate_info->is_overflows;
+        out_block.info.bucket_num = aggregate_info->bucket_num;
+    }
+    partition_writer->write(partition_info, out_block);
+    split_result.wall_time += wall_time.elapsedNanoseconds();
+}
+
+void SparkExchangeSink::onFinish()
+{
+    Stopwatch wall_time;
+    if (!dynamic_cast<LocalPartitionWriter*>(partition_writer.get()))
+    {
+        partition_writer->evictPartitions();
+    }
+    split_result.wall_time += wall_time.elapsedNanoseconds();
+}
+
+void SparkExchangeSink::initOutputHeader(const Block & block)
+{
+    if (!output_header)
+    {
+        if (output_columns_indicies.empty())
+        {
+            output_header = block.cloneEmpty();
+            for (size_t i = 0; i < block.columns(); ++i)
+                output_columns_indicies.push_back(i);
+        }
+        else
+        {
+            ColumnsWithTypeAndName cols;
+            for (const auto & index : output_columns_indicies)
+                cols.push_back(block.getByPosition(index));
+
+            output_header = Block(std::move(cols));
+        }
+    }
+}
+
+SparkExchangeManager::SparkExchangeManager(const Block& header, const String & 
short_name, const SplitOptions & options_, jobject rss_pusher): 
input_header(materializeBlock(header)), options(options_)
+{
+    if (rss_pusher)
+    {
+        GET_JNIENV(env)
+        jclass celeborn_partition_pusher_class =
+            CreateGlobalClassReference(env, 
"Lorg/apache/spark/shuffle/CelebornPartitionPusher;");
+        jmethodID celeborn_push_partition_data_method =
+            GetMethodID(env, celeborn_partition_pusher_class, 
"pushPartitionData", "(I[BI)I");
+        CLEAN_JNIENV
+        celeborn_client = std::make_unique<CelebornClient>(rss_pusher, 
celeborn_push_partition_data_method);
+        use_rss = true;
+    }
+    if (!partitioner_creators.contains(short_name))
+    {
+        throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "unsupported 
splitter {}", short_name);
+    }
+    partitioner_creator = partitioner_creators[short_name];
+    Poco::StringTokenizer output_column_tokenizer(options_.out_exprs, ",");
+    for (const auto & iter : output_column_tokenizer)
+        output_columns_indicies.push_back(std::stoi(iter));
+    auto overhead_memory = header.columns() * 16 * options.split_size * 
options.partition_num;
+    use_sort_shuffle = overhead_memory > options.spill_threshold * 0.5 || 
options.partition_num >= 300 || options.force_memory_sort;
+
+    split_result.partition_lengths.resize(options.partition_num, 0);
+    split_result.raw_partition_lengths.resize(options.partition_num, 0);
+}
+
+std::shared_ptr<PartitionWriter> createPartitionWriter(const SplitOptions& 
options, bool use_sort_shuffle, std::unique_ptr<CelebornClient> celeborn_client)
+{
+    if (celeborn_client)
+    {
+        if (use_sort_shuffle)
+            return 
std::make_shared<MemorySortCelebornPartitionWriter>(options, 
std::move(celeborn_client));

Review Comment:
   I wonder whether a celeborn client could be shared among multi threads. If 
it's sharable, make it a shared_ptr may be better.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [CH] Shuffle writer connects to CH pipeline [incubator-gluten]

Reply via email to