This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 029ee014baf branch-4.0: [fix](parquet)fix parquet reader lazy 
materialization cannot filter. #60474 (#60685)
029ee014baf is described below

commit 029ee014bafd11783726df0bd2da3a8e6fb0fd7c
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Mar 3 17:57:32 2026 +0800

    branch-4.0: [fix](parquet)fix parquet reader lazy materialization cannot 
filter. #60474 (#60685)
    
    Cherry-picked from #60474
    
    Co-authored-by: daidai <[email protected]>
---
 be/src/vec/exec/scan/file_scanner.cpp              |  14 +-
 be/src/vec/exec/scan/file_scanner.h                |   3 +-
 .../hive/test_orc_lazy_mat_profile.groovy          | 310 +++++++++++
 .../hive/test_parquet_lazy_mat_profile.groovy      | 604 +++++++++++++++++++++
 4 files changed, 920 insertions(+), 11 deletions(-)

diff --git a/be/src/vec/exec/scan/file_scanner.cpp 
b/be/src/vec/exec/scan/file_scanner.cpp
index be471a85438..4b9535df259 100644
--- a/be/src/vec/exec/scan/file_scanner.cpp
+++ b/be/src/vec/exec/scan/file_scanner.cpp
@@ -357,15 +357,11 @@ Status FileScanner::_process_conjuncts() {
     return Status::OK();
 }
 
-Status FileScanner::_process_late_arrival_conjuncts(bool* changed,
-                                                    VExprContextSPtrs& 
new_push_down_conjuncts) {
-    *changed = false;
+Status FileScanner::_process_late_arrival_conjuncts() {
     if (_push_down_conjuncts.size() < _conjuncts.size()) {
-        *changed = true;
         _push_down_conjuncts = _conjuncts;
         _conjuncts.clear();
         RETURN_IF_ERROR(_process_conjuncts());
-        new_push_down_conjuncts = _push_down_conjuncts;
     }
     if (_applied_rf_num == _total_rf_num) {
         
_local_state->scanner_profile()->add_info_string("ApplyAllRuntimeFilters", 
"True");
@@ -1045,7 +1041,9 @@ Status FileScanner::_get_next_reader() {
             // ATTN: the push down agg type may be set back to NONE,
             // see IcebergTableReader::init_row_filters for example.
             parquet_reader->set_push_down_agg_type(_get_push_down_agg_type());
-
+            if (push_down_predicates) {
+                RETURN_IF_ERROR(_process_late_arrival_conjuncts());
+            }
             RETURN_IF_ERROR(_init_parquet_reader(std::move(parquet_reader), 
file_meta_cache_ptr));
 
             need_to_get_parsed_schema = true;
@@ -1066,9 +1064,7 @@ Status FileScanner::_get_next_reader() {
 
             orc_reader->set_push_down_agg_type(_get_push_down_agg_type());
             if (push_down_predicates) {
-                bool changed = false;
-                VExprContextSPtrs new_push_down_conjuncts;
-                RETURN_IF_ERROR(_process_late_arrival_conjuncts(&changed, 
new_push_down_conjuncts));
+                RETURN_IF_ERROR(_process_late_arrival_conjuncts());
             }
             RETURN_IF_ERROR(_init_orc_reader(std::move(orc_reader), 
file_meta_cache_ptr));
 
diff --git a/be/src/vec/exec/scan/file_scanner.h 
b/be/src/vec/exec/scan/file_scanner.h
index bc8149fd3b2..f8b68200806 100644
--- a/be/src/vec/exec/scan/file_scanner.h
+++ b/be/src/vec/exec/scan/file_scanner.h
@@ -253,8 +253,7 @@ private:
     void _init_runtime_filter_partition_prune_block();
     Status _process_runtime_filters_partition_prune(bool& is_partition_pruned);
     Status _process_conjuncts();
-    Status _process_late_arrival_conjuncts(bool* changed,
-                                           VExprContextSPtrs& 
new_push_down_conjuncts);
+    Status _process_late_arrival_conjuncts();
     void _get_slot_ids(VExpr* expr, std::vector<int>* slot_ids);
     Status _generate_truncate_columns(bool need_to_get_parsed_schema);
     Status _set_fill_or_truncate_columns(bool need_to_get_parsed_schema);
diff --git 
a/regression-test/suites/external_table_p0/hive/test_orc_lazy_mat_profile.groovy
 
b/regression-test/suites/external_table_p0/hive/test_orc_lazy_mat_profile.groovy
new file mode 100644
index 00000000000..c7ffa7aaf5d
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_orc_lazy_mat_profile.groovy
@@ -0,0 +1,310 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import groovy.json.JsonSlurper
+
+suite("test_orc_lazy_mat_profile", 
"p0,external,hive,external_docker,external_docker_hive") {
+    def getProfileList = {
+        def dst = 'http://' + context.config.feHttpAddress
+        def conn = new URL(dst + "/rest/v1/query_profile").openConnection()
+        conn.setRequestMethod("GET")
+        def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+                (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword)).getBytes("UTF-8"))
+        conn.setRequestProperty("Authorization", "Basic ${encoding}")
+        return conn.getInputStream().getText()
+    }
+
+    def getProfile = { id ->
+        def dst = 'http://' + context.config.feHttpAddress
+        def conn = new URL(dst + 
"/api/profile/text/?query_id=$id").openConnection()
+        conn.setRequestMethod("GET")
+        def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+                (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword)).getBytes("UTF-8"))
+        conn.setRequestProperty("Authorization", "Basic ${encoding}")
+        return conn.getInputStream().getText()
+    }
+
+    def getProfileWithToken = { token ->
+        String profileId = ""
+        int attempts = 0
+        while (attempts < 10 && (profileId == null || profileId == "")) {
+            List profileData = new 
JsonSlurper().parseText(getProfileList()).data.rows
+            for (def profileItem in profileData) {
+                if (profileItem["Sql Statement"].toString().contains(token)) {
+                    profileId = profileItem["Profile ID"].toString()
+                    break
+                }
+            }
+            if (profileId == null || profileId == "") {
+                Thread.sleep(300)
+            }
+            attempts++
+        }
+        assertTrue(profileId != null && profileId != "")
+        Thread.sleep(800)
+        return getProfile(profileId).toString()
+    }
+
+    def extractProfileBlockMetrics = {String profileText, String blockName ->
+        List<String> lines = profileText.readLines()
+
+        Map<String, String> metrics = [:]
+        boolean inBlock = false
+        int blockIndent = -1
+
+        lines.each { line ->
+            if (!inBlock) {
+                def m = line =~ /^(\s*)-\s+${Pattern.quote(blockName)}:/
+                if (m.find()) {
+                    inBlock = true
+                    blockIndent = m.group(1).length()
+                }
+            } else {
+                // 当前行缩进
+                def indent = (line =~ /^(\s*)/)[0][1].length()
+
+                if (indent > blockIndent) {
+                    def kv = line =~ /^\s*-\s*([^:]+):\s*(.+)$/
+                    if (kv.matches()) {
+                        metrics[kv[0][1].trim()] = kv[0][2].trim()
+                    }
+                } else {
+                    // 缩进回退,block 结束
+                    inBlock = false
+                }
+            }
+        }
+
+        return metrics
+    }
+
+    def extractProfileValue =  { String profileText, String keyName -> 
+        def matcher = profileText =~ /(?m)^\s*-\s*${keyName}:\s*(.+)$/
+        return matcher.find() ? matcher.group(1).trim() : null
+    }
+
+    // session vars
+    sql "unset variable all;"
+    sql "set profile_level=2;"
+    sql "set enable_profile=true;"
+    sql " set parallel_pipeline_task_num = 1;"
+    sql " set file_split_size = 10000000;"
+    sql """set max_file_scanners_concurrency =  1; """
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (!"true".equalsIgnoreCase(enabled)) {
+        return;
+    }
+
+    for (String hivePrefix : ["hive2"]) {
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hmsPort = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String catalog_name = "test_orc_lazy_mat_profile"
+
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        
+        sql """ use `global_lazy_mat_db` """
+
+        def q1 = {
+            def t1 = UUID.randomUUID().toString()
+            
+            def sql_result = sql """
+                select *, "${t1}" from orc_topn_lazy_mat_table where file_id = 
1 and id = 1;
+            """
+            logger.info("sql_result = ${sql_result}");
+            return getProfileWithToken(t1);             
+        }
+
+
+        def q2 = {
+            def t1 = UUID.randomUUID().toString()
+            
+            def sql_result = sql """
+                select *, "${t1}" from orc_topn_lazy_mat_table where file_id = 
1 and id <= 2;
+            """
+            logger.info("sql_result = ${sql_result}");
+            return getProfileWithToken(t1);             
+        }
+
+        def q3 = {
+            def t1 = UUID.randomUUID().toString()
+            
+            def sql_result = sql """
+                select *, "${t1}" from orc_topn_lazy_mat_table where file_id = 
1 and id <= 3;
+            """
+            logger.info("sql_result = ${sql_result}");
+            return getProfileWithToken(t1);             
+        }
+
+        def q4 = {
+            def t1 = UUID.randomUUID().toString()
+            
+            def sql_result = sql """
+                select *, "${t1}" from orc_topn_lazy_mat_table where file_id = 
1 and id < 0;
+            """
+            logger.info("sql_result = ${sql_result}");
+            return getProfileWithToken(t1);             
+        }
+
+        def test_true_true = {
+            sql " set enable_orc_filter_by_min_max = true; "
+            sql " set enable_orc_lazy_materialization = true; "
+
+            def profileStr = q1()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("2", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q2()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("1", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q3()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q4()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+        }
+        test_true_true();
+
+
+        def test_true_false = {
+            sql " set enable_orc_filter_by_min_max = true; "
+            sql " set enable_orc_lazy_materialization = false; "
+
+            def profileStr = q1()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q2()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q3()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("1", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q4()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("3", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+        }
+        test_true_false();
+
+
+        def test_false_false = {
+            sql " set enable_orc_filter_by_min_max = false; "
+            sql " set enable_orc_lazy_materialization = false; "
+
+            def profileStr = q1()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q2()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q3()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q4()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("0", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+        }
+        test_false_false();
+
+
+
+        def test_false_true = {
+            sql " set enable_orc_filter_by_min_max = false; "
+            sql " set enable_orc_lazy_materialization = true; "
+
+            def profileStr = q1()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("8", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q2()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("7", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+            profileStr = q3()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("6", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+
+
+            profileStr = q4()
+            logger.info("profileStr = \n${profileStr}");
+            assertEquals("9", extractProfileValue(profileStr, 
"FilteredRowsByLazyRead"))
+            assertEquals("0", extractProfileValue(profileStr, 
"EvaluatedRowGroupCount"))
+            assertEquals("0", extractProfileValue(profileStr, 
"SelectedRowGroupCount"))
+        }
+        test_false_true();
+
+
+
+
+
+
+        sql """drop catalog ${catalog_name};"""
+    }
+
+
+
+
+  
+}
diff --git 
a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
 
b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
new file mode 100644
index 00000000000..3cfdccaa41b
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
@@ -0,0 +1,604 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import java.util.regex.Pattern
+import groovy.json.JsonSlurper
+
+suite("test_parquet_lazy_mat_profile", 
"p0,external,hive,external_docker,external_docker_hive") {
+
+
+    def getProfileList = {
+        def dst = 'http://' + context.config.feHttpAddress
+        def conn = new URL(dst + "/rest/v1/query_profile").openConnection()
+        conn.setRequestMethod("GET")
+        def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+                (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword)).getBytes("UTF-8"))
+        conn.setRequestProperty("Authorization", "Basic ${encoding}")
+        return conn.getInputStream().getText()
+    }
+
+    def getProfile = { id ->
+        def dst = 'http://' + context.config.feHttpAddress
+        def conn = new URL(dst + 
"/api/profile/text/?query_id=$id").openConnection()
+        conn.setRequestMethod("GET")
+        def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+                (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword)).getBytes("UTF-8"))
+        conn.setRequestProperty("Authorization", "Basic ${encoding}")
+        return conn.getInputStream().getText()
+    }
+
+    def getProfileWithToken = { token ->
+        String profileId = ""
+        int attempts = 0
+        while (attempts < 10 && (profileId == null || profileId == "")) {
+            List profileData = new 
JsonSlurper().parseText(getProfileList()).data.rows
+            for (def profileItem in profileData) {
+                if (profileItem["Sql Statement"].toString().contains(token)) {
+                    profileId = profileItem["Profile ID"].toString()
+                    break
+                }
+            }
+            if (profileId == null || profileId == "") {
+                Thread.sleep(300)
+            }
+            attempts++
+        }
+        assertTrue(profileId != null && profileId != "")
+        Thread.sleep(800)
+        return getProfile(profileId).toString()
+    }
+
+    def extractProfileBlockMetrics = {String profileText, String blockName ->
+        List<String> lines = profileText.readLines()
+
+        Map<String, String> metrics = [:]
+        boolean inBlock = false
+        int blockIndent = -1
+
+        lines.each { line ->
+            if (!inBlock) {
+                def m = line =~ /^(\s*)-\s+${Pattern.quote(blockName)}:/
+                if (m.find()) {
+                    inBlock = true
+                    blockIndent = m.group(1).length()
+                }
+            } else {
+                // 当前行缩进
+                def indent = (line =~ /^(\s*)/)[0][1].length()
+
+                if (indent > blockIndent) {
+                    def kv = line =~ /^\s*-\s*([^:]+):\s*(.+)$/
+                    if (kv.matches()) {
+                        metrics[kv[0][1].trim()] = kv[0][2].trim()
+                    }
+                } else {
+                    // 缩进回退,block 结束
+                    inBlock = false
+                }
+            }
+        }
+
+        return metrics
+    }
+
+    def extractProfileValue =  { String profileText, String keyName -> 
+        def matcher = profileText =~ /(?m)^\s*-\s*${keyName}:\s*(.+)$/
+        return matcher.find() ? matcher.group(1).trim() : null
+    }
+
+    // session vars
+    sql "unset variable all;"
+    sql "set profile_level=2;"
+    sql "set enable_profile=true;"
+    sql " set parallel_pipeline_task_num = 1;"
+    sql " set file_split_size = 10000000;"
+    sql """set max_file_scanners_concurrency =  1; """
+
+    String enabled = context.config.otherConfigs.get("enableHiveTest")
+    if (!"true".equalsIgnoreCase(enabled)) {
+        return;
+    }
+    for (String hivePrefix : ["hive2"]) {
+        String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+        String hmsPort = context.config.otherConfigs.get(hivePrefix + 
"HmsPort")
+        String catalog_name = "test_parquet_lazy_mat_profile"
+
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        
+        sql """ use `default` """
+
+        
+        // fact_big : only one data file, 100 rows, 100 row groups, per row 
group 1 row, k from 1 to 100
+        def q1 = {
+            def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select *, "${t1}" from fact_big where k = 1;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1);
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+        def q2  = {
+            def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select *, "${t1}" from fact_big where k = 20;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+
+
+            
+        def q3 = {
+            def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select *, "${t1}" from fact_big where k = 1100;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+
+
+
+
+
+        // only one data file, 7300 rows, 1 rows groups, id column 325 pages, 
per page 27/21 rows 
+        def q4 = {
+                        def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select * ,"${t1}" from   alltypes_tiny_pages_plain where id = 
1;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+
+
+
+        def q5 = {
+            def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select * ,"${t1}" from   alltypes_tiny_pages_plain where id <= 
13;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+        def q6 = {
+                        def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select * ,"${t1}" from   alltypes_tiny_pages_plain where id >= 
1 ;
+            """
+            logger.info("sql_result = ${sql_result}");
+
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+
+
+        def q7 = {
+                        def t1 = UUID.randomUUID().toString()
+            def sql_result = sql """
+                select * ,"${t1}" from   alltypes_tiny_pages_plain where id in 
(1,2);
+            """
+            logger.info("sql_result = ${sql_result}");  
+            def profileText = getProfileWithToken(t1)
+            assertTrue(profileText.contains("ParquetReader"), "Profile does 
not contain ParquetReader")
+            return extractProfileBlockMetrics(profileText, "ParquetReader")
+        }
+
+
+
+        def test_true_true = {
+            sql """ set enable_parquet_filter_by_min_max = true; """
+            sql """ set enable_parquet_lazy_materialization = true; """
+
+            def metrics = q1()
+            logger.info("metrics = ${metrics}")
+            assertEquals("99", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("1", metrics["RawRowsRead"])
+            assertEquals("99", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("99", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q2()
+            logger.info("metrics = ${metrics}")
+            assertEquals("99", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("1", metrics["RawRowsRead"])
+            assertEquals("99", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("99", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q3()
+            logger.info("metrics = ${metrics}")
+            assertEquals("100", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("0", metrics["RawRowsRead"])
+            assertEquals("100", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("100", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("0", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q4()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("20", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.279K (7279)", metrics["FilteredRowsByPage"])
+            assertEquals("21", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q5()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("28", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.258K (7258)", metrics["FilteredRowsByPage"])
+            assertEquals("42", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q6()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("1", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q7()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("19", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.279K (7279)", metrics["FilteredRowsByPage"])
+            assertEquals("21", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+        }
+
+
+        def test_true_false = {
+            sql """ set enable_parquet_filter_by_min_max = true; """
+            sql """ set enable_parquet_lazy_materialization = false; """
+
+            def metrics = q1()
+            logger.info("metrics = ${metrics}")
+            assertEquals("99", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("1", metrics["RawRowsRead"])
+            assertEquals("99", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("99", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q2()
+            logger.info("metrics = ${metrics}")
+            assertEquals("99", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("1", metrics["RawRowsRead"])
+            assertEquals("99", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("99", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q3()
+            logger.info("metrics = ${metrics}")
+            assertEquals("100", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("0", metrics["RawRowsRead"])
+            assertEquals("100", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("100", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("0", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q4()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.279K (7279)", metrics["FilteredRowsByPage"])
+            assertEquals("21", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q5()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.258K (7258)", metrics["FilteredRowsByPage"])
+            assertEquals("42", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q6()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q7()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("7.279K (7279)", metrics["FilteredRowsByPage"])
+            assertEquals("21", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+        }
+
+
+        def test_false_false = {
+            sql """ set enable_parquet_filter_by_min_max = false; """
+            sql """ set enable_parquet_lazy_materialization = false; """
+
+            def metrics = q1()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q2()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q3()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q4()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("7.3K (7300)", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q5()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("7.3K (7300)", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q6()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("7.3K (7300)", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q7()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("0", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("7.3K (7300)", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+        }
+
+
+        def test_false_true = {
+            sql """ set enable_parquet_filter_by_min_max = false; """
+            sql """ set enable_parquet_lazy_materialization = true; """
+
+            def metrics = q1()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("99", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q2()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("99", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q3()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertEquals("100", metrics["FilteredRowsByLazyRead"])
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertEquals("100", metrics["RawRowsRead"])
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("100", metrics["RowGroupsReadNum"])
+            assertEquals("100", metrics["RowGroupsTotalNum"])
+
+            metrics = q4()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertTrue(metrics["FilteredRowsByLazyRead"].contains("7299"))
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q5()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertTrue(metrics["FilteredRowsByLazyRead"].contains("7286"))
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q6()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertTrue(metrics["FilteredRowsByLazyRead"].contains("1"))
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+
+            metrics = q7()
+            logger.info("metrics = ${metrics}")
+            assertEquals("0", metrics["FilteredRowsByGroup"])
+            assertTrue(metrics["FilteredRowsByLazyRead"].contains("7298"))
+            assertEquals("0", metrics["FilteredRowsByPage"])
+            assertTrue(metrics["RawRowsRead"].contains("7300"))
+            assertEquals("0", metrics["RowGroupsFiltered"])
+            assertEquals("0", metrics["RowGroupsFilteredByBloomFilter"])
+            assertEquals("0", metrics["RowGroupsFilteredByMinMax"])
+            assertEquals("1", metrics["RowGroupsReadNum"])
+            assertEquals("1", metrics["RowGroupsTotalNum"])
+        }
+
+        test_true_true();
+        test_true_false();
+        test_false_false();
+        test_false_true();
+
+
+        sql """drop catalog ${catalog_name};"""
+    }
+
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to