marin-ma commented on code in PR #5675:
URL: https://github.com/apache/incubator-gluten/pull/5675#discussion_r1602952627


##########
gluten-data/src/main/java/org/apache/gluten/vectorized/ShuffleWriterJniWrapper.java:
##########
@@ -161,8 +167,10 @@ public native long nativeMake(
       long taskAttemptId,
       int startPartitionId,
       int pushBufferMaxSize,
+      long sortBufferMaxSize,
       Object pusher,
-      String partitionWriterType);
+      String partitionWriterType,
+      String shuffleWriterType);

Review Comment:
   Maybe we can consider separating the JNI call for hash/sort shuffle in the 
future.



##########
cpp/core/shuffle/ShuffleReader.cc:
##########
@@ -16,6 +16,9 @@
  */
 
 #include "ShuffleReader.h"
+
+#include <jni/JniCommon.h>

Review Comment:
   Unintended change?



##########
cpp/velox/tests/VeloxShuffleWriterTest.cc:
##########
@@ -70,10 +72,17 @@ std::vector<ShuffleTestParams> createShuffleTestParams() {
   for (const auto& compression : compressions) {
     for (const auto compressionThreshold : compressionThresholds) {
       for (const auto mergeBufferSize : mergeBufferSizes) {
-        params.push_back(
-            ShuffleTestParams{PartitionWriterType::kLocal, compression, 
compressionThreshold, mergeBufferSize});
+        params.push_back(ShuffleTestParams{
+            ShuffleWriterType::kHashShuffle,
+            PartitionWriterType::kLocal,
+            compression,
+            compressionThreshold,
+            mergeBufferSize});
       }
-      params.push_back(ShuffleTestParams{PartitionWriterType::kRss, 
compression, compressionThreshold, 0});
+      params.push_back(ShuffleTestParams{
+          ShuffleWriterType::kHashShuffle, PartitionWriterType::kRss, 
compression, compressionThreshold, 0});
+      params.push_back(ShuffleTestParams{

Review Comment:
   Looks like the "compressionThreshold" is not used in the "sort + rss" code 
path. We can move this combination out of this loop to reduce the number of 
test cases.



##########
cpp/velox/utils/tests/VeloxShuffleWriterTestBase.h:
##########
@@ -321,10 +360,19 @@ class SinglePartitioningShuffleWriter : public 
VeloxShuffleWriterTest {
     static const uint32_t kNumPartitions = 1;
     auto partitionWriter = createPartitionWriter(
         GetParam().partitionWriterType, kNumPartitions, dataFile_, localDirs_, 
partitionWriterOptions_, arrowPool);
-    GLUTEN_ASSIGN_OR_THROW(
-        auto shuffleWriter,
-        VeloxShuffleWriter::create(
-            kNumPartitions, std::move(partitionWriter), 
std::move(shuffleWriterOptions_), pool_, arrowPool));
+    std::shared_ptr<VeloxShuffleWriter> shuffleWriter;
+    if (GetParam().shuffleWriterType == kHashShuffle) {
+      GLUTEN_ASSIGN_OR_THROW(
+          shuffleWriter,
+          VeloxHashBasedShuffleWriter::create(
+              kNumPartitions, std::move(partitionWriter), 
std::move(shuffleWriterOptions_), pool_, arrowPool));
+    } else if (
+        GetParam().shuffleWriterType == kSortShuffle && 
GetParam().partitionWriterType == PartitionWriterType::kRss) {
+      GLUTEN_ASSIGN_OR_THROW(
+          shuffleWriter,
+          VeloxSortBasedShuffleWriter::create(
+              kNumPartitions, std::move(partitionWriter), 
std::move(shuffleWriterOptions_), pool_, arrowPool));
+    }

Review Comment:
   Please extract this logic into a function to avoid copy-paste.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to