This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 947397e9994 [Bug](rf) fix rf of in filter cast data as different class 
type maybe return wrong result (#39026)
947397e9994 is described below

commit 947397e999429104ce941e13df1d7369f4077160
Author: zhangstar333 <[email protected]>
AuthorDate: Sat Aug 10 18:27:31 2024 +0800

    [Bug](rf) fix rf of in filter cast data as different class type maybe 
return wrong result (#39026)
    
    two point have changed:
    1. in batch_assign function:
    const std::string& string_value = column.stringval();
    if call **insert(&string_value)**, will cast as string_ref:
    reinterpret_cast<const **StringRef***>(data), this maybe error;
    ```
        void insert(const void* data) override {
            if (data == nullptr) {
                _contains_null = true;
                return;
            }
    
            const auto* value = reinterpret_cast<const StringRef*>(data);
            std::string str_value(value->data, value->size);
            _set.insert(str_value);
        }
    ```
    
    2. in batch_copy function, will cast void_value as T*
    but the it->get_value() return is StringRef, so need change T as
    StringRef
    ```
    template <typename T>
    void batch_copy(PInFilter* filter, HybridSetBase::IteratorBase* it,
                    void (*set_func)(PColumnValue*, const T*)) {
        while (it->has_next()) {
            const void* void_value = it->get_value();
            auto origin_value = reinterpret_cast<const T*>(void_value);
            set_func(filter->add_values(), origin_value);
            it->next();
        }
    }
    ```
---
 be/src/exprs/runtime_filter.cpp                              | 12 ++++++++----
 regression-test/data/query_p0/join/test_runtimefilter_2.out  |  9 +++++++++
 .../suites/query_p0/join/test_runtimefilter_2.groovy         | 11 +++++++++++
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index c6fd3338b14..5a241326f90 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -694,8 +694,10 @@ public:
         case TYPE_CHAR:
         case TYPE_STRING: {
             batch_assign(in_filter, [](std::shared_ptr<HybridSetBase>& set, 
PColumnValue& column) {
-                const auto& string_val_ref = column.stringval();
-                set->insert(&string_val_ref);
+                const std::string& string_value = column.stringval();
+                // string_value is std::string, call insert(data, size) 
function in StringSet will not cast as StringRef
+                // so could avoid some cast error at different class object.
+                set->insert((void*)string_value.data(), string_value.size());
             });
             break;
         }
@@ -1630,8 +1632,10 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) {
     case TYPE_CHAR:
     case TYPE_VARCHAR:
     case TYPE_STRING: {
-        batch_copy<std::string>(filter, it, [](PColumnValue* column, const 
std::string* value) {
-            column->set_stringval(*value);
+        //const void* void_value = it->get_value();
+        //Now the get_value return void* is StringRef
+        batch_copy<StringRef>(filter, it, [](PColumnValue* column, const 
StringRef* value) {
+            column->set_stringval(value->to_string());
         });
         return;
     }
diff --git a/regression-test/data/query_p0/join/test_runtimefilter_2.out 
b/regression-test/data/query_p0/join/test_runtimefilter_2.out
index d6cc7fc59a0..005406e6793 100644
--- a/regression-test/data/query_p0/join/test_runtimefilter_2.out
+++ b/regression-test/data/query_p0/join/test_runtimefilter_2.out
@@ -2,3 +2,12 @@
 -- !select_1 --
 aaa
 
+-- !select_2 --
+aaa
+
+-- !select_3 --
+BSDSAE1018     1       1       true    BSDSAE1018      1       true    true
+
+-- !select_4 --
+2      3       BSDSAE1018
+
diff --git a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy 
b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy
index 6e6e57c6c2d..50a61a366b1 100644
--- a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy
+++ b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy
@@ -30,4 +30,15 @@
      qt_select_1 """
             select     "aaa" FROM     t_ods_tpisyncjpa4_2 tpisyncjpa4     
inner join (         SELECT             USER_ID,             MAX(INTERNAL_CODE) 
as INTERNAL_CODE         FROM             t_ods_tpisyncjpa4_2         WHERE     
        STATE_ID = '1'         GROUP BY             USER_ID     ) jpa4 on 
tpisyncjpa4.USER_ID = jpa4.USER_ID;
      """
+     sql """set runtime_filter_type='IN';"""
+     qt_select_2 """
+            select     "aaa" FROM     t_ods_tpisyncjpa4_2 tpisyncjpa4     
inner join (         SELECT             USER_ID,             MAX(INTERNAL_CODE) 
as INTERNAL_CODE         FROM             t_ods_tpisyncjpa4_2         WHERE     
        STATE_ID = '1'         GROUP BY             USER_ID     ) jpa4 on 
tpisyncjpa4.USER_ID = jpa4.USER_ID;
+     """
+     qt_select_3 """
+            select *, tpisyncjpp1.POST_ID=jpp1.POST_ID, 
tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE from ( select 
tpisyncjpp1.POST_ID,tpisyncjpp1.INTERNAL_CODE as INTERNAL_CODE, 
tpisyncjpp1.STATE_ID, tpisyncjpp1.STATE_ID  ='1' from ( select tpisyncjpa4.* 
from t_ods_tpisyncjpa4_2  tpisyncjpa4 inner  join [broadcast]       (           
  SELECT                 USER_ID,                 MAX(INTERNAL_CODE)  as  
INTERNAL_CODE             FROM                t_ods_tpisyncjpa4_2               
   [...]
+     """
+     qt_select_4 """
+        select DISTINCT         tpisyncjpa4.USER_ID as USER_ID,            
tpisyncjpa4.USER_NAME as USER_NAME,       tpisyncjpp1.POST_ID AS "T4_POST_ID"   
FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 cross join [shuffle]       
t_ods_tpisyncjpp1_2 tpisyncjpp1            inner join            (       SELECT 
        USER_ID,         MAX(INTERNAL_CODE) as INTERNAL_CODE       FROM        
t_ods_tpisyncjpa4_2              WHERE         STATE_ID = '1'       GROUP BY    
     USER_ID     )jpa4         on  [...]
+     """
+    
  }
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to