Copilot commented on code in PR #59832:
URL: https://github.com/apache/doris/pull/59832#discussion_r2703010081


##########
be/src/pipeline/dependency.h:
##########
@@ -622,6 +622,12 @@ struct HashJoinSharedState : public JoinSharedState {
     // memory in `_hash_table_variants`. So before execution, we should use a 
local _hash_table_variants
     // which has a shared hash table in it.
     std::vector<std::shared_ptr<JoinDataVariants>> hash_table_variant_vector;
+
+    // wheather left semi join could directly return

Review Comment:
   Typo in comment: "wheather" should be "whether".
   ```suggestion
       // whether left semi join could directly return
   ```



##########
regression-test/suites/query_p0/join/test_left_join1.groovy:
##########
@@ -39,4 +39,23 @@ suite("test_left_join1", "query,p0,arrow_flight_sql") {
                   ORDER BY
                   a.f_key; 
              """
+
+    sql "set runtime_filter_type=1,runtime_filter_max_in_num=0;"
+
+       qt_select """ SELECT
+                                                       *
+                                                       FROM
+                                       ( SELECT f_key, f_value FROM 
${tableName} ) a
+                                       LEFT JOIN ( SELECT f_key, f_value FROM 
${tableName} ) b ON a.f_key = b.f_key
+                                       LEFT JOIN (
+                                                       SELECT
+                                       *
+                                       FROM
+                                       ${tableName}
+                                       WHERE
+                                       f_key IN ( SELECT f_key FROM 
${tableName} WHERE f_key IN ( SELECT f_key FROM ${tableName} WHERE f_value > 
123 ) )
+                                       ) c ON a.f_key = c.f_key
+                                       ORDER BY
+                                       a.f_key; 
+                       """

Review Comment:
   Inconsistent indentation: The query uses tabs for indentation starting at 
line 45, while the rest of the file uses spaces. This should be standardized to 
match the file's existing style (spaces).
   ```suggestion
       qt_select """ SELECT
                             *
                             FROM
                     ( SELECT f_key, f_value FROM ${tableName} ) a
                     LEFT JOIN ( SELECT f_key, f_value FROM ${tableName} ) b ON 
a.f_key = b.f_key
                     LEFT JOIN (
                             SELECT
                     *
                     FROM
                     ${tableName}
                     WHERE
                     f_key IN ( SELECT f_key FROM ${tableName} WHERE f_key IN ( 
SELECT f_key FROM ${tableName} WHERE f_value > 123 ) )
                     ) c ON a.f_key = c.f_key
                     ORDER BY
                     a.f_key; 
                """
   ```



##########
be/src/pipeline/exec/hashjoin_probe_operator.cpp:
##########
@@ -283,8 +289,11 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* 
state, vectorized::Bloc
     }
 
     local_state._estimate_memory_usage += temp_block.allocated_bytes();
-    RETURN_IF_ERROR(
-            local_state.filter_data_and_build_output(state, output_block, eos, 
&temp_block));
+    // if left_semi_direct_return is true,
+    // here dose not increase the output rows count(just same as 
`_probe_block`'s rows count).

Review Comment:
   Typo in comment: "dose" should be "does".
   ```suggestion
       // here does not increase the output rows count(just same as 
`_probe_block`'s rows count).
   ```



##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -1373,6 +1375,9 @@ public enum IgnoreSplitType {
     @VariableMgr.VarAttr(name = ENABLE_CONDITION_CACHE)
     public boolean enableConditionCache = true;
 
+    @VariableMgr.VarAttr(name = ENABLE_LEFT_SEMI_DIRECT_RETRUN_OPT)

Review Comment:
   Same typo in the VarAttr annotation: "RETRUN" should be "RETURN". This 
should match the corrected constant name "ENABLE_LEFT_SEMI_DIRECT_RETURN_OPT".
   ```suggestion
       @VariableMgr.VarAttr(name = ENABLE_LEFT_SEMI_DIRECT_RETURN_OPT)
   ```



##########
be/src/olap/column_predicate.h:
##########
@@ -327,8 +327,10 @@ class ColumnPredicate : public 
std::enable_shared_from_this<ColumnPredicate> {
     void attach_profile_counter(
             int filter_id, std::shared_ptr<RuntimeProfile::Counter> 
predicate_filtered_rows_counter,
             std::shared_ptr<RuntimeProfile::Counter> 
predicate_input_rows_counter,
-            std::shared_ptr<RuntimeProfile::Counter> 
predicate_always_true_rows_counter) {
+            std::shared_ptr<RuntimeProfile::Counter> 
predicate_always_true_rows_counter,
+            const RuntimeFilterSelectivity& rf_selectivity) {
         _runtime_filter_id = filter_id;
+        _rf_selectivity = rf_selectivity;

Review Comment:
   Potential issue: The RuntimeFilterSelectivity object is being passed by 
const reference and then assigned to a mutable member variable. This creates a 
copy of the object, not a reference. This means changes to _rf_selectivity 
won't be reflected in the original object passed in. Consider storing a pointer 
or reference instead of copying the object, or ensure this copy-by-value 
behavior is intentional. If the intent is to share state, this will not work 
correctly.



##########
be/src/runtime_filter/runtime_filter_producer.h:
##########
@@ -131,6 +131,13 @@ class RuntimeFilterProducer : public RuntimeFilter {
         _wrapper = wrapper;
     }
 
+    bool detect_in_filter() {
+        if (_has_remote_target) {
+            return false;
+        }

Review Comment:
   Potential concurrency issue: The detect_in_filter method accesses _wrapper 
without holding the mutex (_rmtx), while other methods like set_wrapper and 
wrapper() properly acquire the lock. This could lead to race conditions if 
detect_in_filter is called concurrently with set_wrapper.
   ```suggestion
           }
           std::unique_lock<std::recursive_mutex> l(_rmtx);
   ```



##########
be/src/runtime_filter/runtime_filter_wrapper.h:
##########
@@ -120,6 +122,17 @@ class RuntimeFilterWrapper {
         }
     }
 
+    bool detect_in_filter() {
+        if (get_real_type() != RuntimeFilterType::IN_FILTER) {
+            return false;
+        }
+        if (_state != State::READY) {
+            return false;
+        }
+        _detected_in_filter = true;
+        return true;
+    }

Review Comment:
   Potential concurrency issue: The detect_in_filter method reads _state (which 
is atomic) but then writes to _detected_in_filter (which is not atomic) without 
synchronization. According to the comment at line 159-161, the wrapper may be 
shared by producer and consumer, so concurrent access is possible. The 
_detected_in_filter member should be std::atomic<bool> to ensure thread-safe 
access.



##########
fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java:
##########
@@ -302,6 +302,8 @@ public class SessionVariable implements Serializable, 
Writable {
 
     public static final String ENABLE_SHARED_SCAN = "enable_shared_scan";
 
+    public static final String ENABLE_LEFT_SEMI_DIRECT_RETRUN_OPT = 
"enable_left_semi_direct_return_opt";

Review Comment:
   Typo in constant name: "RETRUN" should be "RETURN". The constant name should 
be "ENABLE_LEFT_SEMI_DIRECT_RETURN_OPT" to match the actual session variable 
name and maintain consistency.
   ```suggestion
       public static final String ENABLE_LEFT_SEMI_DIRECT_RETURN_OPT = 
"enable_left_semi_direct_return_opt";
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to