save-buffer commented on code in PR #12289:
URL: https://github.com/apache/arrow/pull/12289#discussion_r859076718


##########
cpp/src/arrow/compute/exec/hash_join_node_test.cc:
##########
@@ -1900,5 +1903,150 @@ TEST(HashJoin, TrivialResidualFilter) {
   }
 }
 
+HashJoinNodeOptions GenerateHashJoinNodeOptions(Random64Bit& rng, int 
num_left_cols,
+                                                int num_right_cols) {
+  HashJoinNodeOptions opts;
+  opts.join_type = static_cast<JoinType>(rng.from_range(0, 7));
+  bool is_left_join = opts.join_type == JoinType::LEFT_SEMI ||
+                      opts.join_type == JoinType::LEFT_ANTI ||
+                      opts.join_type == JoinType::LEFT_OUTER;
+  bool is_right_join = opts.join_type == JoinType::RIGHT_SEMI ||
+                       opts.join_type == JoinType::RIGHT_ANTI ||
+                       opts.join_type == JoinType::RIGHT_OUTER;
+
+  int num_keys = rng.from_range(1, std::min(num_left_cols, num_right_cols));
+  for (int i = 0; i < num_left_cols; i++) {
+    bool is_out = rng.from_range(0, 2) != 2;

Review Comment:
   The higher the chance of a probe-side output, the higher the more columns we 
can use for Bloom filters. I guess it's pretty arbitrary, but I wanted it to be 
slightly higher. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to