This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 47f49ea16a6 [Fix](orc-reader-merge-io) Clear streams map when all row 
groups are filtered by row group stats, despite stripe stats remaining 
unfiltered. (#50185)
47f49ea16a6 is described below

commit 47f49ea16a6b0789cada0b64e32163e990e8fdaf
Author: Qi Chen <[email protected]>
AuthorDate: Thu Apr 24 04:42:04 2025 +0800

    [Fix](orc-reader-merge-io) Clear streams map when all row groups are 
filtered by row group stats, despite stripe stats remaining unfiltered. (#50185)
    
    ### What problem does this PR solve?
    
    Related PR: https://github.com/apache/doris-thirdparty/pull/306
    
    Problem Summary:
    When all row groups are filtered by row group stats, despite stripe
    stats remaining unfiltered, stream map is not clear, which caused read
    error data.
    
    ```
    ERROR 1105 (HY000): errCode = 2, detailMessage = 
(172.20.32.136)[INTERNAL_ERROR]Orc row reader nextBatch failed. reason = Buffer 
error in ZlibDecompressionStream::NextDecompress.
    ```
---
 be/src/apache-orc                                  |   2 +-
 .../hive/test_orc_merge_io_input_streams.out       | Bin 0 -> 123 bytes
 .../hive/test_orc_merge_io_input_streams.groovy    |  52 +++++++++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/be/src/apache-orc b/be/src/apache-orc
index c9891b33063..18fb8e2c288 160000
--- a/be/src/apache-orc
+++ b/be/src/apache-orc
@@ -1 +1 @@
-Subproject commit c9891b33063b32c72f3db6c61b6b59ba65444aee
+Subproject commit 18fb8e2c2888a3518bf2bbd905f60772f4754739
diff --git 
a/regression-test/data/external_table_p2/hive/test_orc_merge_io_input_streams.out
 
b/regression-test/data/external_table_p2/hive/test_orc_merge_io_input_streams.out
new file mode 100644
index 00000000000..fa812f2afc2
Binary files /dev/null and 
b/regression-test/data/external_table_p2/hive/test_orc_merge_io_input_streams.out
 differ
diff --git 
a/regression-test/suites/external_table_p2/hive/test_orc_merge_io_input_streams.groovy
 
b/regression-test/suites/external_table_p2/hive/test_orc_merge_io_input_streams.groovy
new file mode 100644
index 00000000000..8406bf0b423
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/hive/test_orc_merge_io_input_streams.groovy
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_orc_merge_io_input_streams", 
"p2,external,hive,external_remote,external_remote_hive") {
+
+    String enabled = context.config.otherConfigs.get("enableExternalHudiTest")
+    //hudi hive use same catalog in p2.
+    if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+        logger.info("disable test")
+        return;
+    }
+
+    String props = context.config.otherConfigs.get("hudiEmrCatalog")
+    String hms_catalog_name = "test_orc_merge_io_input_streams"
+
+    sql """drop catalog if exists ${hms_catalog_name};"""
+    sql """
+        CREATE CATALOG IF NOT EXISTS ${hms_catalog_name}
+        PROPERTIES (
+            ${props}
+            ,'hive.version' = '3.1.3'
+        );
+    """
+
+    logger.info("catalog " + hms_catalog_name + " created")
+    sql """switch ${hms_catalog_name};"""
+    logger.info("switched to catalog " + hms_catalog_name)
+    sql """ use regression;"""
+
+    sql """ set dry_run_query=true; """
+
+    qt_1 """  SELECT trace_id as trace_id_sub,created_time FROM 
test_orc_merge_io_input_streams_table
+              where dt = replace(date_sub('2025-04-16', 1), '-', '') and 
trace_id='1210647803'; """
+    qt_2 """ select * from test_orc_merge_io_input_streams_table ; """
+
+    sql """drop catalog ${hms_catalog_name};"""
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to