This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 189801c2eea [Feature](Variant) support export csv format (#37857)
189801c2eea is described below

commit 189801c2eea5868ff5e3fdf799eb03f4b1811baa
Author: lihangyu <[email protected]>
AuthorDate: Tue Jul 16 16:44:28 2024 +0800

    [Feature](Variant) support export csv format (#37857)
---
 .../data_types/serde/data_type_object_serde.cpp    |  10 ++
 .../vec/data_types/serde/data_type_object_serde.h  |   4 +-
 .../export_p0/test_outfile_csv_variant_type.out    |  41 ++++++
 .../export_p0/test_outfile_csv_variant_type.groovy | 141 +++++++++++++++++++++
 4 files changed, 193 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_object_serde.cpp 
b/be/src/vec/data_types/serde/data_type_object_serde.cpp
index e9015db653a..c19a5f18595 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_object_serde.cpp
@@ -114,6 +114,16 @@ void 
DataTypeObjectSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbV
     variant.insert(field);
 }
 
+Status DataTypeObjectSerDe::serialize_one_cell_to_json(const IColumn& column, 
int row_num,
+                                                       BufferWritable& bw,
+                                                       FormatOptions& options) 
const {
+    const auto* var = check_and_get_column<ColumnObject>(column);
+    if (!var->serialize_one_row_to_string(row_num, bw)) {
+        return Status::InternalError("Failed to serialize variant {}", 
var->dump_structure());
+    }
+    return Status::OK();
+}
+
 void DataTypeObjectSerDe::write_column_to_arrow(const IColumn& column, const 
NullMap* null_map,
                                                 arrow::ArrayBuilder* 
array_builder, int start,
                                                 int end, const 
cctz::time_zone& ctz) const {
diff --git a/be/src/vec/data_types/serde/data_type_object_serde.h 
b/be/src/vec/data_types/serde/data_type_object_serde.h
index 66178f0ecb3..9351b200f53 100644
--- a/be/src/vec/data_types/serde/data_type_object_serde.h
+++ b/be/src/vec/data_types/serde/data_type_object_serde.h
@@ -39,9 +39,7 @@ public:
     DataTypeObjectSerDe(int nesting_level = 1) : DataTypeSerDe(nesting_level) 
{};
 
     Status serialize_one_cell_to_json(const IColumn& column, int row_num, 
BufferWritable& bw,
-                                      FormatOptions& options) const override {
-        return Status::NotSupported("serialize_one_cell_to_json with type 
[{}]", column.get_name());
-    }
+                                      FormatOptions& options) const override;
 
     Status serialize_column_to_json(const IColumn& column, int start_idx, int 
end_idx,
                                     BufferWritable& bw, FormatOptions& 
options) const override {
diff --git a/regression-test/data/export_p0/test_outfile_csv_variant_type.out 
b/regression-test/data/export_p0/test_outfile_csv_variant_type.out
new file mode 100644
index 00000000000..8016918c79b
--- /dev/null
+++ b/regression-test/data/export_p0/test_outfile_csv_variant_type.out
@@ -0,0 +1,41 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_base1 --
+1      doris1  [9,99,999]
+2      doris2  [8,88]
+3      doris3  {"a":123}
+4      doris4  \N
+5      doris5  [1,null,2]
+6      doris6  {"aaaa":"111111"}
+7      doris7  {"bbbb":1.1111}
+8      doris8  {"xxx":[111.11]}
+
+-- !select_load1 --
+1      doris1  [9,99,999]
+2      doris2  [8,88]
+3      doris3  {"a":123}
+4      doris4  \N
+5      doris5  [1,null,2]
+6      doris6  {"aaaa":"111111"}
+7      doris7  {"bbbb":1.1111}
+8      doris8  {"xxx":[111.11]}
+
+-- !select_base2 --
+1      doris1  [9,99,999]
+2      doris2  [8,88]
+3      doris3  {"a":123}
+4      doris4  {}
+5      doris5  [1,null,2]
+6      doris6  {"aaaa":"111111"}
+7      doris7  {"bbbb":1.1111}
+8      doris8  {"xxx":[111.11]}
+
+-- !select_load2 --
+1      doris1  [9,99,999]
+2      doris2  [8,88]
+3      doris3  {"a":123}
+4      doris4  {}
+5      doris5  [1,null,2]
+6      doris6  {"aaaa":"111111"}
+7      doris7  {"bbbb":1.1111}
+8      doris8  {"xxx":[111.11]}
+
diff --git 
a/regression-test/suites/export_p0/test_outfile_csv_variant_type.groovy 
b/regression-test/suites/export_p0/test_outfile_csv_variant_type.groovy
new file mode 100644
index 00000000000..9ea95a287b7
--- /dev/null
+++ b/regression-test/suites/export_p0/test_outfile_csv_variant_type.groovy
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_outfile_csv_variant_type", "p0") {
+    // open nereids
+    sql """ set enable_nereids_planner=true """
+    sql """ set enable_fallback_to_original_planner=false """
+
+    String ak = getS3AK()
+    String sk = getS3SK()
+    String s3_endpoint = getS3Endpoint()
+    String region = getS3Region()
+    String bucket = context.config.otherConfigs.get("s3BucketName");
+
+
+    def export_table_name = "outfile_csv_variant_export_test"
+    def load_table_name = "outfile_csv_variant_type_load_test"
+    def outFilePath = "${bucket}/outfile/csv/variant_type/exp_"
+
+
+    def create_table = {table_name, struct_field ->
+        sql """ DROP TABLE IF EXISTS ${table_name} """
+        sql """
+        CREATE TABLE IF NOT EXISTS ${table_name} (
+            `user_id` LARGEINT NOT NULL COMMENT "用户id",
+            `name` STRING COMMENT "用户年龄",
+            ${struct_field}
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+    }
+
+    def outfile_to_S3 = {
+        // select ... into outfile ...
+        def res = sql """
+            SELECT * FROM ${export_table_name} t ORDER BY user_id
+            INTO OUTFILE "s3://${outFilePath}"
+            FORMAT AS CSV
+            PROPERTIES (
+                "s3.endpoint" = "${s3_endpoint}",
+                "s3.region" = "${region}",
+                "s3.secret_key"="${sk}",
+                "s3.access_key" = "${ak}"
+            );
+        """
+
+        return res[0][3]
+    }
+
+
+    // 1. test NULL variant 
+    try {
+        def struct_field_define = "`a_info` VARIANT NULL"
+        // create table to export data
+        create_table(export_table_name, struct_field_define)
+        // create table to load data
+        create_table(load_table_name, struct_field_define)
+
+
+        // insert data
+        sql """ insert into ${export_table_name} values (1, 'doris1', '[9, 99, 
999]'), (2, 'doris2', '[8, 88]'); """
+        sql """ insert into ${export_table_name} values (3, 'doris3', '{"a" : 
123}'); """
+        sql """ insert into ${export_table_name} values (4, 'doris4', null); 
"""
+        sql """ insert into ${export_table_name} values (5, 'doris5', '[1, 
null, 2]'); """
+        sql """ insert into ${export_table_name} values (6, 'doris6', '{"aaaa" 
: "111111"}'); """
+        sql """ insert into ${export_table_name} values (7, 'doris7', '{"bbbb" 
: 1.1111}'); """
+        sql """ insert into ${export_table_name} values (8, 'doris8', '{"xxx" 
: [111.11]}'); """
+
+
+        // test base data
+        qt_select_base1 """ SELECT * FROM ${export_table_name} t ORDER BY 
user_id; """
+
+        def outfile_url = outfile_to_S3()
+
+        qt_select_load1 """ SELECT * FROM S3 (
+                "uri" = 
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), 
outfile_url.length() - 1)}0.csv",
+                "ACCESS_KEY"= "${ak}",
+                "SECRET_KEY" = "${sk}",
+                "format" = "csv",
+                "region" = "${region}"
+            );
+            """
+    } finally {
+    }
+
+
+    // 2. test NOT NULL VARIANT
+    try {
+        def struct_field_define = "`a_info` VARIANT NOT NULL"
+        // create table to export data
+        create_table(export_table_name, struct_field_define)
+        // create table to load data
+        create_table(load_table_name, struct_field_define)
+
+
+        // insert data
+        // insert data
+        sql """ insert into ${export_table_name} values (1, 'doris1', '[9, 99, 
999]'), (2, 'doris2', '[8, 88]'); """
+        sql """ insert into ${export_table_name} values (3, 'doris3', '{"a" : 
123}'); """
+        sql """ insert into ${export_table_name} values (4, 'doris4', '{}'); 
"""
+        sql """ insert into ${export_table_name} values (5, 'doris5', '[1, 
null, 2]'); """
+        sql """ insert into ${export_table_name} values (6, 'doris6', '{"aaaa" 
: "111111"}'); """
+        sql """ insert into ${export_table_name} values (7, 'doris7', '{"bbbb" 
: 1.1111}'); """
+        sql """ insert into ${export_table_name} values (8, 'doris8', '{"xxx" 
: [111.11]}'); """
+
+        // test base data
+        qt_select_base2 """ SELECT * FROM ${export_table_name} t ORDER BY 
user_id; """
+
+        def outfile_url = outfile_to_S3()
+
+        qt_select_load2 """ SELECT * FROM S3 (
+                "uri" = 
"http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), 
outfile_url.length() - 1)}0.csv",
+                "ACCESS_KEY"= "${ak}",
+                "SECRET_KEY" = "${sk}",
+                "format" = "csv",
+                "region" = "${region}"
+            );
+            """
+    } finally {
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to