This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 97d36b4f38c [fix](csv_reader) fix trim_double_quotes behavior change 
(#27882)
97d36b4f38c is described below

commit 97d36b4f38cccda9796bad9bc2dcfa328ef20271
Author: HHoflittlefish777 <[email protected]>
AuthorDate: Sun Dec 3 22:57:55 2023 +0800

    [fix](csv_reader) fix trim_double_quotes behavior change (#27882)
---
 be/src/util/slice.h                                | 20 ++++++++
 be/src/vec/exec/format/csv/csv_reader.cpp          |  2 +-
 .../stream_load/test_csv_with_double_quotes.out    |  2 +
 .../test_double_quotes_with_enclose.csv            |  1 +
 .../data/load_p0/stream_load/test_single_quote.csv |  1 +
 .../stream_load/test_csv_with_double_quotes.groovy | 54 ++++++++++++++++++++++
 6 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/be/src/util/slice.h b/be/src/util/slice.h
index dce933e5580..ed119b8051f 100644
--- a/be/src/util/slice.h
+++ b/be/src/util/slice.h
@@ -176,6 +176,26 @@ public:
         }
         return change;
     }
+
+    /// Remove quote char '"' which should exist as first and last char.
+    ///
+    /// @pre n <= size
+    ///
+    /// @note Only the base and bounds of the slice are changed;
+    ///   the data is not modified.
+    ///
+    /// @param [in] n
+    ///   Number of bytes of space that should be dropped from the beginning.
+    bool trim_double_quotes() {
+        int32_t begin = 0;
+        if (size > 2 && (data[begin] == '"' && data[size - 1] == '"')) {
+            data += 1;
+            size -= 2;
+            return true;
+        }
+        return false;
+    }
+
     /// Truncate the slice to the given number of bytes.
     ///
     /// @pre n <= size
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 94407941673..fced6bdd490 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -619,7 +619,7 @@ Status CsvReader::_create_decompressor() {
 template <bool from_json>
 Status CsvReader::deserialize_nullable_string(IColumn& column, Slice& slice) {
     auto& null_column = assert_cast<ColumnNullable&>(column);
-    if (!(from_json && _options.converted_from_string && slice.trim_quote())) {
+    if (!(from_json && _options.converted_from_string && 
slice.trim_double_quotes())) {
         if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
             null_column.insert_data(nullptr, 0);
             return Status::OK();
diff --git 
a/regression-test/data/load_p0/stream_load/test_csv_with_double_quotes.out 
b/regression-test/data/load_p0/stream_load/test_csv_with_double_quotes.out
index 0ae5ebe7f7a..13a0dbaee9a 100644
--- a/regression-test/data/load_p0/stream_load/test_csv_with_double_quotes.out
+++ b/regression-test/data/load_p0/stream_load/test_csv_with_double_quotes.out
@@ -19,3 +19,5 @@
 7      8       3       abc     2022-12-01      2022-12-01T09:30:31
 8      9       3       abc     2022-12-01      2022-12-01T09:30:31
 
+-- !sql_test_single_quote --
+'a'    21      'b'
diff --git 
a/regression-test/data/load_p0/stream_load/test_double_quotes_with_enclose.csv 
b/regression-test/data/load_p0/stream_load/test_double_quotes_with_enclose.csv
new file mode 100644
index 00000000000..d77096361a1
--- /dev/null
+++ 
b/regression-test/data/load_p0/stream_load/test_double_quotes_with_enclose.csv
@@ -0,0 +1 @@
+?"a"?,21,?"b"?
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/test_single_quote.csv 
b/regression-test/data/load_p0/stream_load/test_single_quote.csv
new file mode 100644
index 00000000000..621abae313f
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_single_quote.csv
@@ -0,0 +1 @@
+'a',21,'b'
\ No newline at end of file
diff --git 
a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy 
b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy
index 1743d28d117..03a31f7997e 100644
--- 
a/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy
+++ 
b/regression-test/suites/load_p0/stream_load/test_csv_with_double_quotes.groovy
@@ -59,4 +59,58 @@ suite("test_csv_with_double_quotes", "p0") {
     sql "sync"
     qt_sql "select * from ${tableName} order by k1, k2"
     sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    def create_table = { testTablex ->
+                    sql """
+                        CREATE TABLE `${testTablex}` (
+                            `name` varchar(48) NULL,
+                            `age` bigint(20) NULL,
+                            `agent_id` varchar(256) NULL
+                            ) ENGINE=OLAP
+                            DUPLICATE KEY(`name`)
+                            COMMENT 'OLAP'
+                            DISTRIBUTED BY RANDOM BUCKETS 10
+                            PROPERTIES (
+                            "replication_allocation" = "tag.location.default: 
1",
+                            "is_being_synced" = "false",
+                            "storage_format" = "V2",
+                            "light_schema_change" = "true",
+                            "disable_auto_compaction" = "false",
+                            "enable_single_replica_compaction" = "false"
+                            ); 
+                        """
+    }
+
+    def tableName1 = "test_single_quotes"
+    try {
+        sql "DROP TABLE IF EXISTS ${tableName1}"
+
+        create_table.call(tableName1)
+
+        streamLoad {
+            table "${tableName1}"
+
+            set 'column_separator', ','
+            set 'trim_double_quotes', 'true'
+
+            file 'test_single_quote.csv'
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(1, json.NumberTotalRows)
+                assertEquals(0, json.NumberFilteredRows)
+                assertEquals(0, json.NumberUnselectedRows)
+            }
+        }
+
+        qt_sql_test_single_quote "SELECT * FROM ${tableName1} order by name"
+
+    } finally {
+        sql "DROP TABLE IF EXISTS ${tableName1}"
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to