This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 56ccb9a6577 [fix](parquet) fix parquet reader missing column and 
filter missing column (#36182)
56ccb9a6577 is described below

commit 56ccb9a657761d08fa216e741443c899e122843b
Author: Ashin Gau <[email protected]>
AuthorDate: Thu Jun 13 21:30:05 2024 +0800

    [fix](parquet) fix parquet reader missing column and filter missing column 
(#36182)
    
    bp #36189
---
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 4 ++++
 be/src/vec/exec/format/parquet/vparquet_group_reader.h   | 1 +
 be/src/vec/exec/format/parquet/vparquet_reader.cpp       | 8 ++++++++
 3 files changed, 13 insertions(+)

diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 807f016cb43..5e824f34817 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -174,6 +174,10 @@ Status RowGroupReader::init(
             }
         }
     }
+    //For check missing column :   missing column == xx, missing column is 
null,missing column is not null.
+    _filter_conjuncts.insert(_filter_conjuncts.end(),
+                             _lazy_read_ctx.missing_columns_conjuncts.begin(),
+                             _lazy_read_ctx.missing_columns_conjuncts.end());
     RETURN_IF_ERROR(_rewrite_dict_predicates());
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
index d38f5a74adf..d9f7f2dbf34 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.h
@@ -94,6 +94,7 @@ public:
         std::unordered_map<std::string, std::tuple<std::string, const 
SlotDescriptor*>>
                 partition_columns;
         std::unordered_map<std::string, VExprContextSPtr> 
predicate_missing_columns;
+        VExprContextSPtrs missing_columns_conjuncts;
         // lazy read missing columns or all missing columns
         std::unordered_map<std::string, VExprContextSPtr> missing_columns;
         // should turn off filtering by page index, lazy read and dict filter 
if having complex type
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 18610a1f655..f99786dc6e2 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -426,6 +426,14 @@ Status ParquetReader::set_fill_columns(
         if (iter == predicate_columns.end()) {
             _lazy_read_ctx.missing_columns.emplace(kv.first, kv.second);
         } else {
+            //For check missing column :   missing column == xx, missing 
column is null,missing column is not null.
+            if (_slot_id_to_filter_conjuncts->find(iter->second.second) !=
+                _slot_id_to_filter_conjuncts->end()) {
+                for (auto& ctx : 
_slot_id_to_filter_conjuncts->find(iter->second.second)->second) {
+                    _lazy_read_ctx.missing_columns_conjuncts.emplace_back(ctx);
+                }
+            }
+
             _lazy_read_ctx.predicate_missing_columns.emplace(kv.first, 
kv.second);
             
_lazy_read_ctx.all_predicate_col_ids.emplace_back(iter->second.first);
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to