This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new de03c152cec [fix](thrift)cancel thrift msg max size limit (#25194)
de03c152cec is described below

commit de03c152cec0403cf5f6c4ee2e94bb2ff7315346
Author: slothever <18522955+w...@users.noreply.github.com>
AuthorDate: Fri Oct 13 20:21:36 2023 +0800

    [fix](thrift)cancel thrift msg max size limit (#25194)
    
    On Thrift 0.14.0+, need use TConfiguration to raise the max message size.
    see https://github.com/apache/arrow/pull/11123/files
---
 be/src/util/thrift_util.h                          |   8 +-
 .../ddl/parquet_large_metadata_100mb_create.sql    | 258 +++++++++++++++++++++
 .../ddl/parquet_large_metadata_100mb_drop.sql      |   1 +
 .../test_parquet_large_metadata_load.groovy        | 120 ++++++++++
 4 files changed, 386 insertions(+), 1 deletion(-)

diff --git a/be/src/util/thrift_util.h b/be/src/util/thrift_util.h
index 6b40dd6906f..b16482df610 100644
--- a/be/src/util/thrift_util.h
+++ b/be/src/util/thrift_util.h
@@ -135,8 +135,14 @@ Status deserialize_thrift_msg(const uint8_t* buf, 
uint32_t* len, bool compact,
     // Deserialize msg bytes into c++ thrift msg using memory
     // transport. TMemoryBuffer is not const-safe, although we use it in
     // a const-safe way, so we have to explicitly cast away the const.
+    auto conf = std::make_shared<apache::thrift::TConfiguration>();
+    // On Thrift 0.14.0+, need use TConfiguration to raise the max message 
size.
+    // max message size is 100MB default, so make it unlimited.
+    conf->setMaxMessageSize(std::numeric_limits<int>::max());
     std::shared_ptr<apache::thrift::transport::TMemoryBuffer> tmem_transport(
-            new 
apache::thrift::transport::TMemoryBuffer(const_cast<uint8_t*>(buf), *len));
+            new apache::thrift::transport::TMemoryBuffer(
+                    const_cast<uint8_t*>(buf), *len,
+                    apache::thrift::transport::TMemoryBuffer::OBSERVE, conf));
     std::shared_ptr<apache::thrift::protocol::TProtocol> tproto =
             create_deserialize_protocol(tmem_transport, compact);
 
diff --git 
a/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_create.sql
 
b/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_create.sql
new file mode 100644
index 00000000000..c22d0a7db10
--- /dev/null
+++ 
b/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_create.sql
@@ -0,0 +1,258 @@
+    CREATE TABLE IF NOT EXISTS parquet_large_metadata_100mb
+    (
+        `0`  BIGINT,
+        `1`  BIGINT,
+        `2`  BIGINT,
+        `3`  BIGINT,
+        `4`  BIGINT,
+        `5`  BIGINT,
+        `6`  BIGINT,
+        `7`  BIGINT,
+        `8`  BIGINT,
+        `9`  BIGINT,
+        `10`  BIGINT,
+        `11`  BIGINT,
+        `12`  BIGINT,
+        `13`  BIGINT,
+        `14`  BIGINT,
+        `15`  BIGINT,
+        `16`  BIGINT,
+        `17`  BIGINT,
+        `18`  BIGINT,
+        `19`  BIGINT,
+        `20`  BIGINT,
+        `21`  BIGINT,
+        `22`  BIGINT,
+        `23`  BIGINT,
+        `24`  BIGINT,
+        `25`  BIGINT,
+        `26`  BIGINT,
+        `27`  BIGINT,
+        `28`  BIGINT,
+        `29`  BIGINT,
+        `30`  BIGINT,
+        `31`  BIGINT,
+        `32`  BIGINT,
+        `33`  BIGINT,
+        `34`  BIGINT,
+        `35`  BIGINT,
+        `36`  BIGINT,
+        `37`  BIGINT,
+        `38`  BIGINT,
+        `39`  BIGINT,
+        `40`  BIGINT,
+        `41`  BIGINT,
+        `42`  BIGINT,
+        `43`  BIGINT,
+        `44`  BIGINT,
+        `45`  BIGINT,
+        `46`  BIGINT,
+        `47`  BIGINT,
+        `48`  BIGINT,
+        `49`  BIGINT,
+        `50`  BIGINT,
+        `51`  BIGINT,
+        `52`  BIGINT,
+        `53`  BIGINT,
+        `54`  BIGINT,
+        `55`  BIGINT,
+        `56`  BIGINT,
+        `57`  BIGINT,
+        `58`  BIGINT,
+        `59`  BIGINT,
+        `60`  BIGINT,
+        `61`  BIGINT,
+        `62`  BIGINT,
+        `63`  BIGINT,
+        `64`  BIGINT,
+        `65`  BIGINT,
+        `66`  BIGINT,
+        `67`  BIGINT,
+        `68`  BIGINT,
+        `69`  BIGINT,
+        `70`  BIGINT,
+        `71`  BIGINT,
+        `72`  BIGINT,
+        `73`  BIGINT,
+        `74`  BIGINT,
+        `75`  BIGINT,
+        `76`  BIGINT,
+        `77`  BIGINT,
+        `78`  BIGINT,
+        `79`  BIGINT,
+        `80`  BIGINT,
+        `81`  BIGINT,
+        `82`  BIGINT,
+        `83`  BIGINT,
+        `84`  BIGINT,
+        `85`  BIGINT,
+        `86`  BIGINT,
+        `87`  BIGINT,
+        `88`  BIGINT,
+        `89`  BIGINT,
+        `90`  BIGINT,
+        `91`  BIGINT,
+        `92`  BIGINT,
+        `93`  BIGINT,
+        `94`  BIGINT,
+        `95`  BIGINT,
+        `96`  BIGINT,
+        `97`  BIGINT,
+        `98`  BIGINT,
+        `99`  BIGINT,
+        `100`  BIGINT,
+        `101`  BIGINT,
+        `102`  BIGINT,
+        `103`  BIGINT,
+        `104`  BIGINT,
+        `105`  BIGINT,
+        `106`  BIGINT,
+        `107`  BIGINT,
+        `108`  BIGINT,
+        `109`  BIGINT,
+        `110`  BIGINT,
+        `111`  BIGINT,
+        `112`  BIGINT,
+        `113`  BIGINT,
+        `114`  BIGINT,
+        `115`  BIGINT,
+        `116`  BIGINT,
+        `117`  BIGINT,
+        `118`  BIGINT,
+        `119`  BIGINT,
+        `120`  BIGINT,
+        `121`  BIGINT,
+        `122`  BIGINT,
+        `123`  BIGINT,
+        `124`  BIGINT,
+        `125`  BIGINT,
+        `126`  BIGINT,
+        `127`  BIGINT,
+        `128`  BIGINT,
+        `129`  BIGINT,
+        `130`  BIGINT,
+        `131`  BIGINT,
+        `132`  BIGINT,
+        `133`  BIGINT,
+        `134`  BIGINT,
+        `135`  BIGINT,
+        `136`  BIGINT,
+        `137`  BIGINT,
+        `138`  BIGINT,
+        `139`  BIGINT,
+        `140`  BIGINT,
+        `141`  BIGINT,
+        `142`  BIGINT,
+        `143`  BIGINT,
+        `144`  BIGINT,
+        `145`  BIGINT,
+        `146`  BIGINT,
+        `147`  BIGINT,
+        `148`  BIGINT,
+        `149`  BIGINT,
+        `150`  BIGINT,
+        `151`  BIGINT,
+        `152`  BIGINT,
+        `153`  BIGINT,
+        `154`  BIGINT,
+        `155`  BIGINT,
+        `156`  BIGINT,
+        `157`  BIGINT,
+        `158`  BIGINT,
+        `159`  BIGINT,
+        `160`  BIGINT,
+        `161`  BIGINT,
+        `162`  BIGINT,
+        `163`  BIGINT,
+        `164`  BIGINT,
+        `165`  BIGINT,
+        `166`  BIGINT,
+        `167`  BIGINT,
+        `168`  BIGINT,
+        `169`  BIGINT,
+        `170`  BIGINT,
+        `171`  BIGINT,
+        `172`  BIGINT,
+        `173`  BIGINT,
+        `174`  BIGINT,
+        `175`  BIGINT,
+        `176`  BIGINT,
+        `177`  BIGINT,
+        `178`  BIGINT,
+        `179`  BIGINT,
+        `180`  BIGINT,
+        `181`  BIGINT,
+        `182`  BIGINT,
+        `183`  BIGINT,
+        `184`  BIGINT,
+        `185`  BIGINT,
+        `186`  BIGINT,
+        `187`  BIGINT,
+        `188`  BIGINT,
+        `189`  BIGINT,
+        `190`  BIGINT,
+        `191`  BIGINT,
+        `192`  BIGINT,
+        `193`  BIGINT,
+        `194`  BIGINT,
+        `195`  BIGINT,
+        `196`  BIGINT,
+        `197`  BIGINT,
+        `198`  BIGINT,
+        `199`  BIGINT,
+        `200`  BIGINT,
+        `201`  BIGINT,
+        `202`  BIGINT,
+        `203`  BIGINT,
+        `204`  BIGINT,
+        `205`  BIGINT,
+        `206`  BIGINT,
+        `207`  BIGINT,
+        `208`  BIGINT,
+        `209`  BIGINT,
+        `210`  BIGINT,
+        `211`  BIGINT,
+        `212`  BIGINT,
+        `213`  BIGINT,
+        `214`  BIGINT,
+        `215`  BIGINT,
+        `216`  BIGINT,
+        `217`  BIGINT,
+        `218`  BIGINT,
+        `219`  BIGINT,
+        `220`  BIGINT,
+        `221`  BIGINT,
+        `222`  BIGINT,
+        `223`  BIGINT,
+        `224`  BIGINT,
+        `225`  BIGINT,
+        `226`  BIGINT,
+        `227`  BIGINT,
+        `228`  BIGINT,
+        `229`  BIGINT,
+        `230`  BIGINT,
+        `231`  BIGINT,
+        `232`  BIGINT,
+        `233`  BIGINT,
+        `234`  BIGINT,
+        `235`  BIGINT,
+        `236`  BIGINT,
+        `237`  BIGINT,
+        `238`  BIGINT,
+        `239`  BIGINT,
+        `240`  BIGINT,
+        `241`  BIGINT,
+        `242`  BIGINT,
+        `243`  BIGINT,
+        `244`  BIGINT,
+        `245`  BIGINT,
+        `246`  BIGINT,
+        `247`  BIGINT,
+        `248`  BIGINT,
+        `249`  BIGINT
+    )
+    DISTRIBUTED BY HASH(`1`, `2`)
+    PROPERTIES
+    (
+        "replication_allocation" = "tag.location.default: 1"
+    );
diff --git 
a/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_drop.sql
 
b/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_drop.sql
new file mode 100644
index 00000000000..b4b576344c0
--- /dev/null
+++ 
b/regression-test/suites/load_p2/broker_load/ddl/parquet_large_metadata_100mb_drop.sql
@@ -0,0 +1 @@
+DROP TABLE IF EXISTS parquet_large_metadata_100mb;
diff --git 
a/regression-test/suites/load_p2/broker_load/test_parquet_large_metadata_load.groovy
 
b/regression-test/suites/load_p2/broker_load/test_parquet_large_metadata_load.groovy
new file mode 100644
index 00000000000..70e0681d0cb
--- /dev/null
+++ 
b/regression-test/suites/load_p2/broker_load/test_parquet_large_metadata_load.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_parquet_large_metadata_load_p2", "p2") {
+
+    def tables = ["parquet_large_metadata_100mb" // metadata size more than 
100MB
+    ]
+    def paths = 
["s3://doris-build-1308700295/regression/load/metadata/parquet_large_metadata_100mb.parquet"
+    ]
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String enabled = context.config.otherConfigs.get("enableBrokerLoad")
+
+    def expect_tvf_result = """[[2, 8], [2, 8], [2, 8], [2, 8], [2, 8]]"""
+    String[][] tvf_result = sql """select `1`,`2` from s3(
+                                     "uri" = 
"https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/load/metadata/parquet_large_metadata_100mb.parquet";,
+                                     "s3.access_key" = "$ak",
+                                     "s3.secret_key" = "$sk",
+                                     "s3.region" = "ap-beijing",
+                                     "format" = "parquet"
+                                   ) order by `1`,`2` limit 5;
+                                """
+    assertTrue("$tvf_result" == "$expect_tvf_result")
+            
+    def do_load_job = { uuid, path, table ->
+        sql """
+            LOAD LABEL $uuid (
+                APPEND
+                DATA INFILE("$path")
+                INTO TABLE $table
+                FORMAT AS "PARQUET"
+            )
+            WITH S3 (
+                "AWS_ACCESS_KEY" = "$ak",
+                "AWS_SECRET_KEY" = "$sk",
+                "AWS_ENDPOINT" = "cos.ap-beijing.myqcloud.com",
+                "AWS_REGION" = "ap-beijing"
+            )
+            PROPERTIES
+            (
+                "strict_mode"="true"
+            ); 
+            """
+        logger.info("Submit load with lable: $uuid, table: $table, path: 
$path")
+    }
+    
+    def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=45000"]
+    def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; 
max_filter_ratio:0.0"]
+    def error_msg = [""]
+    // test unified load
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        def uuids = []
+        try {
+            def i = 0
+
+            for (String table in tables) {
+                sql new 
File("""${context.file.parent}/ddl/${table}_drop.sql""").text
+                sql new 
File("""${context.file.parent}/ddl/${table}_create.sql""").text
+
+                def uuid = UUID.randomUUID().toString().replace("-", "0")
+                uuids.add(uuid)
+                do_load_job.call(uuid, paths[i], table)
+                i++
+            }
+
+            i = 0
+            for (String label in uuids) {
+                def max_try_milli_secs = 600000
+                while (max_try_milli_secs > 0) {
+                    String[][] result = sql """ show load where label="$label" 
order by createtime desc limit 1; """
+                    if (result[0][2].equals("FINISHED")) {
+                        logger.info("Load FINISHED " + label)
+                        assertTrue(result[0][6].contains(task_info[i]))
+                        assertTrue(etl_info[i] == result[0][5], "expected: " + 
etl_info[i] + ", actual: " + result[0][5] + ", label: $label")
+                        break;
+                    }
+                    if (result[0][2].equals("CANCELLED")) {
+                        assertTrue(result[0][6].contains(task_info[i]))
+                        assertTrue(result[0][7].contains(error_msg[i]))
+                        break;
+                    }
+                    Thread.sleep(1000)
+                    max_try_milli_secs -= 1000
+                    if(max_try_milli_secs <= 0) {
+                        assertTrue(1 == 2, "load Timeout: $label")
+                    }
+                }
+                i++
+            }
+                        
+            def expect_result = """[[45000]]"""
+
+            for (String table in tables) {
+                if (table.matches("parquet_large_metadata_100mb")) {
+                    String[][] actual_result = sql """select count(*) from 
parquet_large_metadata_100mb;"""
+                    assertTrue("$actual_result" == "$expect_result")
+                }
+            }
+
+        } finally {
+            for (String table in tables) {
+                sql new 
File("""${context.file.parent}/ddl/${table}_drop.sql""").text
+            }
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to