String default to Utf8View (#16290)

xudong963 Mon, 16 Jun 2025 21:46:46 -0700

This is an automated email from the ASF dual-hosted git repository.

xudong963 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 06631c2531 feat: mapping sql Char/Text/String default to Utf8View 
(#16290)
06631c2531 is described below

commit 06631c25316ed2fc20ab8114d0dcc801f353fbad
Author: Qi Zhu <821684...@qq.com>
AuthorDate: Tue Jun 17 12:38:37 2025 +0800

    feat: mapping sql Char/Text/String default to Utf8View (#16290)
    
    * feat: mapping sql Char/Text/String default to Utf8View
    
    * Add support utf8view for sort merge join
    
    * fix binary utf8view union with int32
    
    * fix slt order
    
    * fix
    
    * fmt
    
    * Fix test
    
    * fix
    
    * fix test
    
    * fix
    
    * clean
    
    * Address comments
    
    * Fix test
    
    * support md5 for ut8view
---
 datafusion/common/src/config.rs                    |   8 +-
 datafusion/core/src/execution/session_state.rs     |   2 +-
 datafusion/core/tests/sql/create_drop.rs           |   2 +-
 .../user_defined/user_defined_scalar_functions.rs  |   2 +-
 datafusion/expr-common/src/type_coercion/binary.rs |   5 +-
 datafusion/functions/src/crypto/basic.rs           |   8 +-
 datafusion/functions/src/crypto/md5.rs             |   8 +-
 .../physical-plan/src/joins/sort_merge_join.rs     |   2 +
 datafusion/sql/src/planner.rs                      |  22 ++-
 datafusion/sql/tests/cases/params.rs               |  12 +-
 datafusion/sql/tests/sql_integration.rs            |   6 +-
 datafusion/sqllogictest/test_files/aggregate.slt   |   8 +-
 datafusion/sqllogictest/test_files/array.slt       |  26 +--
 datafusion/sqllogictest/test_files/arrow_files.slt |   5 +
 datafusion/sqllogictest/test_files/avro.slt        |   4 +-
 datafusion/sqllogictest/test_files/ddl.slt         |   4 +-
 .../sqllogictest/test_files/explain_tree.slt       | 194 +++++++++------------
 .../sqllogictest/test_files/information_schema.slt |   4 +-
 .../test_files/monotonic_projection_test.slt       |   4 +-
 .../test_files/parquet_filter_pushdown.slt         |  16 +-
 .../sqllogictest/test_files/push_down_filter.slt   |   2 +-
 datafusion/sqllogictest/test_files/scalar.slt      |   6 +-
 .../sqllogictest/test_files/simplify_expr.slt      |   8 +-
 datafusion/sqllogictest/test_files/union.slt       |   4 +-
 docs/source/user-guide/configs.md                  |   2 +-
 25 files changed, 170 insertions(+), 194 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 4efb67a37c..8324ce130a 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -259,10 +259,10 @@ config_namespace! {
         /// string length and thus DataFusion can not enforce such limits.
         pub support_varchar_with_length: bool, default = true
 
-       /// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
-       /// If false, `VARCHAR` is mapped to `Utf8`  during SQL planning.
-       /// Default is false.
-        pub map_varchar_to_utf8view: bool, default = true
+        /// If true, string types (VARCHAR, CHAR, Text, and String) are mapped 
to `Utf8View` during SQL planning.
+        /// If false, they are mapped to `Utf8`.
+        /// Default is true.
+        pub map_string_types_to_utf8view: bool, default = true
 
         /// When set to true, the source locations relative to the original SQL
         /// query (i.e. 
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
 will be collected
diff --git a/datafusion/core/src/execution/session_state.rs 
b/datafusion/core/src/execution/session_state.rs
index edf116b00a..1c0363f421 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -494,7 +494,7 @@ impl SessionState {
             enable_options_value_normalization: sql_parser_options
                 .enable_options_value_normalization,
             support_varchar_with_length: 
sql_parser_options.support_varchar_with_length,
-            map_varchar_to_utf8view: 
sql_parser_options.map_varchar_to_utf8view,
+            map_string_types_to_utf8view: 
sql_parser_options.map_string_types_to_utf8view,
             collect_spans: sql_parser_options.collect_spans,
         }
     }
diff --git a/datafusion/core/tests/sql/create_drop.rs 
b/datafusion/core/tests/sql/create_drop.rs
index 83712053b9..b35e614a46 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -61,7 +61,7 @@ async fn create_external_table_with_ddl() -> Result<()> {
     assert_eq!(3, table_schema.fields().len());
 
     assert_eq!(&DataType::Int32, table_schema.field(0).data_type());
-    assert_eq!(&DataType::Utf8, table_schema.field(1).data_type());
+    assert_eq!(&DataType::Utf8View, table_schema.field(1).data_type());
     assert_eq!(&DataType::Boolean, table_schema.field(2).data_type());
 
     Ok(())
diff --git 
a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs 
b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index d7dd65deab..a4e278e511 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -1181,7 +1181,7 @@ async fn 
create_scalar_function_from_sql_statement_postgres_syntax() -> Result<(
                 quote_style: None,
                 span: Span::empty(),
             }),
-            data_type: DataType::Utf8,
+            data_type: DataType::Utf8View,
             default_expr: None,
         }]),
         return_type: Some(DataType::Int32),
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs 
b/datafusion/expr-common/src/type_coercion/binary.rs
index d0fcda9733..955c28c42a 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -462,7 +462,7 @@ pub fn type_union_resolution(data_types: &[DataType]) -> 
Option<DataType> {
 
     // If all the data_types are null, return string
     if data_types.iter().all(|t| t == &DataType::Null) {
-        return Some(DataType::Utf8);
+        return Some(DataType::Utf8View);
     }
 
     // Ignore Nulls, if any data_type category is not the same, return None
@@ -1202,7 +1202,8 @@ pub fn string_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataT
 fn numeric_string_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
-        (Utf8 | LargeUtf8, other_type) | (other_type, Utf8 | LargeUtf8)
+        (Utf8 | LargeUtf8 | Utf8View, other_type)
+        | (other_type, Utf8 | LargeUtf8 | Utf8View)
             if other_type.is_numeric() =>
         {
             Some(other_type.clone())
diff --git a/datafusion/functions/src/crypto/basic.rs 
b/datafusion/functions/src/crypto/basic.rs
index eaa688c1c3..5bf83943a9 100644
--- a/datafusion/functions/src/crypto/basic.rs
+++ b/datafusion/functions/src/crypto/basic.rs
@@ -21,7 +21,7 @@ use arrow::array::{
     Array, ArrayRef, BinaryArray, BinaryArrayType, BinaryViewArray, 
GenericBinaryArray,
     OffsetSizeTrait,
 };
-use arrow::array::{AsArray, GenericStringArray, StringArray, StringViewArray};
+use arrow::array::{AsArray, GenericStringArray, StringViewArray};
 use arrow::datatypes::DataType;
 use blake2::{Blake2b512, Blake2s256, Digest};
 use blake3::Hasher as Blake3;
@@ -169,18 +169,18 @@ pub fn md5(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
     let [data] = take_function_args("md5", args)?;
     let value = digest_process(data, DigestAlgorithm::Md5)?;
 
-    // md5 requires special handling because of its unique utf8 return type
+    // md5 requires special handling because of its unique utf8view return type
     Ok(match value {
         ColumnarValue::Array(array) => {
             let binary_array = as_binary_array(&array)?;
-            let string_array: StringArray = binary_array
+            let string_array: StringViewArray = binary_array
                 .iter()
                 .map(|opt| opt.map(hex_encode::<_>))
                 .collect();
             ColumnarValue::Array(Arc::new(string_array))
         }
         ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
-            ColumnarValue::Scalar(ScalarValue::Utf8(opt.map(hex_encode::<_>)))
+            
ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode::<_>)))
         }
         _ => return exec_err!("Impossibly got invalid results from digest"),
     })
diff --git a/datafusion/functions/src/crypto/md5.rs 
b/datafusion/functions/src/crypto/md5.rs
index c154045002..e209ed06e2 100644
--- a/datafusion/functions/src/crypto/md5.rs
+++ b/datafusion/functions/src/crypto/md5.rs
@@ -92,12 +92,12 @@ impl ScalarUDFImpl for Md5Func {
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         use DataType::*;
         Ok(match &arg_types[0] {
-            LargeUtf8 | LargeBinary => Utf8,
-            Utf8View | Utf8 | Binary | BinaryView => Utf8,
+            LargeUtf8 | LargeBinary => Utf8View,
+            Utf8View | Utf8 | Binary | BinaryView => Utf8View,
             Null => Null,
             Dictionary(_, t) => match **t {
-                LargeUtf8 | LargeBinary => Utf8,
-                Utf8 | Binary | BinaryView => Utf8,
+                LargeUtf8 | LargeBinary => Utf8View,
+                Utf8 | Binary | BinaryView => Utf8View,
                 Null => Null,
                 _ => {
                     return plan_err!(
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs 
b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index f361992caa..6ab069aaf4 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -2492,6 +2492,7 @@ fn compare_join_arrays(
             DataType::Float32 => compare_value!(Float32Array),
             DataType::Float64 => compare_value!(Float64Array),
             DataType::Utf8 => compare_value!(StringArray),
+            DataType::Utf8View => compare_value!(StringViewArray),
             DataType::LargeUtf8 => compare_value!(LargeStringArray),
             DataType::Decimal128(..) => compare_value!(Decimal128Array),
             DataType::Timestamp(time_unit, None) => match time_unit {
@@ -2559,6 +2560,7 @@ fn is_join_arrays_equal(
             DataType::Float32 => compare_value!(Float32Array),
             DataType::Float64 => compare_value!(Float64Array),
             DataType::Utf8 => compare_value!(StringArray),
+            DataType::Utf8View => compare_value!(StringViewArray),
             DataType::LargeUtf8 => compare_value!(LargeStringArray),
             DataType::Decimal128(..) => compare_value!(Decimal128Array),
             DataType::Timestamp(time_unit, None) => match time_unit {
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 5a1f3cdf69..03396822ec 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -52,8 +52,8 @@ pub struct ParserOptions {
     pub enable_options_value_normalization: bool,
     /// Whether to collect spans
     pub collect_spans: bool,
-    /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
-    pub map_varchar_to_utf8view: bool,
+    /// Whether string types (VARCHAR, CHAR, Text, and String) are mapped to 
`Utf8View` during SQL planning.
+    pub map_string_types_to_utf8view: bool,
 }
 
 impl ParserOptions {
@@ -72,7 +72,7 @@ impl ParserOptions {
             parse_float_as_decimal: false,
             enable_ident_normalization: true,
             support_varchar_with_length: true,
-            map_varchar_to_utf8view: true,
+            map_string_types_to_utf8view: true,
             enable_options_value_normalization: false,
             collect_spans: false,
         }
@@ -112,9 +112,9 @@ impl ParserOptions {
         self
     }
 
-    /// Sets the `map_varchar_to_utf8view` option.
-    pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self {
-        self.map_varchar_to_utf8view = value;
+    /// Sets the `map_string_types_to_utf8view` option.
+    pub fn with_map_string_types_to_utf8view(mut self, value: bool) -> Self {
+        self.map_string_types_to_utf8view = value;
         self
     }
 
@@ -143,7 +143,7 @@ impl From<&SqlParserOptions> for ParserOptions {
             parse_float_as_decimal: options.parse_float_as_decimal,
             enable_ident_normalization: options.enable_ident_normalization,
             support_varchar_with_length: options.support_varchar_with_length,
-            map_varchar_to_utf8view: options.map_varchar_to_utf8view,
+            map_string_types_to_utf8view: options.map_string_types_to_utf8view,
             enable_options_value_normalization: options
                 .enable_options_value_normalization,
             collect_spans: options.collect_spans,
@@ -577,7 +577,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     please set `support_varchar_with_length` to be true"
                     ),
                     _ => {
-                        if self.options.map_varchar_to_utf8view {
+                        if self.options.map_string_types_to_utf8view {
                             Ok(DataType::Utf8View)
                         } else {
                             Ok(DataType::Utf8)
@@ -601,7 +601,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 )
             }
             SQLDataType::Char(_) | SQLDataType::Text | SQLDataType::String(_) 
=> {
-                Ok(DataType::Utf8)
+                if self.options.map_string_types_to_utf8view {
+                    Ok(DataType::Utf8View)
+                } else {
+                    Ok(DataType::Utf8)
+                }
             }
             SQLDataType::Timestamp(precision, tz_info)
                 if precision.is_none() || [0, 3, 6, 
9].contains(&precision.unwrap()) =>
diff --git a/datafusion/sql/tests/cases/params.rs 
b/datafusion/sql/tests/cases/params.rs
index b3cc49c310..15e7d923a9 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -746,31 +746,31 @@ fn test_prepare_statement_to_plan_multi_params() {
     assert_snapshot!(
         plan,
         @r#"
-    Prepare: "my_plan" [Int32, Utf8, Float64, Int32, Float64, Utf8]
+    Prepare: "my_plan" [Int32, Utf8View, Float64, Int32, Float64, Utf8View]
       Projection: person.id, person.age, $6
         Filter: person.age IN ([$1, $4]) AND person.salary > $3 AND 
person.salary < $5 OR person.first_name < $2
           TableScan: person
     "#
     );
-    assert_snapshot!(dt, @r#"[Int32, Utf8, Float64, Int32, Float64, Utf8]"#);
+    assert_snapshot!(dt, @r#"[Int32, Utf8View, Float64, Int32, Float64, 
Utf8View]"#);
 
     ///////////////////
     // replace params with values
     let param_values = vec![
         ScalarValue::Int32(Some(10)),
-        ScalarValue::from("abc"),
+        ScalarValue::Utf8View(Some("abc".into())),
         ScalarValue::Float64(Some(100.0)),
         ScalarValue::Int32(Some(20)),
         ScalarValue::Float64(Some(200.0)),
-        ScalarValue::from("xyz"),
+        ScalarValue::Utf8View(Some("xyz".into())),
     ];
 
     let plan_with_params = plan.with_param_values(param_values).unwrap();
     assert_snapshot!(
         plan_with_params,
         @r#"
-    Projection: person.id, person.age, Utf8("xyz") AS $6
-      Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > 
Float64(100) AND person.salary < Float64(200) OR person.first_name < Utf8("abc")
+    Projection: person.id, person.age, Utf8View("xyz") AS $6
+      Filter: person.age IN ([Int32(10), Int32(20)]) AND person.salary > 
Float64(100) AND person.salary < Float64(200) OR person.first_name < 
Utf8View("abc")
         TableScan: person
     "#
     );
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 4be7953aef..c82239d9b4 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -3355,7 +3355,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
         parse_float_as_decimal: true,
         enable_ident_normalization: false,
         support_varchar_with_length: false,
-        map_varchar_to_utf8view: true,
+        map_string_types_to_utf8view: true,
         enable_options_value_normalization: false,
         collect_spans: false,
     }
@@ -3366,7 +3366,7 @@ fn 
ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
         parse_float_as_decimal: true,
         enable_ident_normalization: false,
         support_varchar_with_length: false,
-        map_varchar_to_utf8view: true,
+        map_string_types_to_utf8view: true,
         enable_options_value_normalization: false,
         collect_spans: false,
     }
@@ -3377,7 +3377,7 @@ fn 
ident_normalization_parser_options_ident_normalization() -> ParserOptions {
         parse_float_as_decimal: true,
         enable_ident_normalization: true,
         support_varchar_with_length: false,
-        map_varchar_to_utf8view: true,
+        map_string_types_to_utf8view: true,
         enable_options_value_normalization: false,
         collect_spans: false,
     }
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt 
b/datafusion/sqllogictest/test_files/aggregate.slt
index bd9d8b1f43..f9dc872a3c 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -2278,10 +2278,10 @@ create table t (c string) as values
 query T
 select arrow_typeof(c) from t;
 ----
-Utf8
-Utf8
-Utf8
-Utf8
+Utf8View
+Utf8View
+Utf8View
+Utf8View
 
 query IT
 select count(c), arrow_typeof(count(c)) from t;
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 0139daecca..a2640fa988 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -6073,7 +6073,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), 
Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), 
Utf8View("a"), Utf8View("b"), Utf8View("c")])
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS 
Utf8View), Int64(1), Int64(32)) IN 
([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), 
Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6082,7 +6082,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: "b" [...]
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: [...]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -6102,7 +6102,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), 
Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), 
Utf8View("a"), Utf8View("b"), Utf8View("c")])
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS 
Utf8View), Int64(1), Int64(32)) IN 
([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), 
Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6111,7 +6111,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: "b" [...]
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: [...]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -6131,7 +6131,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), 
Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), 
Utf8View("a"), Utf8View("b"), Utf8View("c")])
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS 
Utf8View), Int64(1), Int64(32)) IN 
([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), 
Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6140,7 +6140,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: "b" [...]
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: [...]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -6162,7 +6162,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: array_has(LargeList([7f4b18de3cfeb9b4ac78c381ee2ad278, a, 
b, c]), substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), Int64(1), 
Int64(32)))
+06)----------Filter: array_has(LargeList([7f4b18de3cfeb9b4ac78c381ee2ad278, a, 
b, c]), substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS Utf8View), 
Int64(1), Int64(32)))
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6171,7 +6171,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: array_has([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, 
c], substr(md5(CAST(value@0 AS Utf8)), 1, 32))
+07)------------FilterExec: array_has([7f4b18de3cfeb9b4ac78c381ee2ad278, a, b, 
c], substr(md5(CAST(value@0 AS Utf8View)), 1, 32))
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -6191,7 +6191,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), 
Int64(1), Int64(32)) IN ([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), 
Utf8View("a"), Utf8View("b"), Utf8View("c")])
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS 
Utf8View), Int64(1), Int64(32)) IN 
([Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), Utf8View("a"), Utf8View("b"), 
Utf8View("c")])
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6200,7 +6200,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: "b" [...]
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IN 
([Literal { value: Utf8View("7f4b18de3cfeb9b4ac78c381ee2ad278"), field: Field { 
name: "7f4b18de3cfeb9b4ac78c381ee2ad278", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("a"), field: Field { name: "a", data_type: Utf8View, nullable: false, 
dict_id: 0, dict_is_ordered: false, metadata: {} } }, Literal { value: 
Utf8View("b"), field: Field { name: [...]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -6222,7 +6222,7 @@ logical_plan
 03)----SubqueryAlias: test
 04)------SubqueryAlias: t
 05)--------Projection:
-06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8)) AS Utf8), 
Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL)
+06)----------Filter: substr(CAST(md5(CAST(tmp_table.value AS Utf8View)) AS 
Utf8View), Int64(1), Int64(32)) IS NOT NULL OR Boolean(NULL)
 07)------------TableScan: tmp_table projection=[value]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1))@0 as count(*)]
@@ -6231,7 +6231,7 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[count(Int64(1))]
 05)--------ProjectionExec: expr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8)), 1, 32) IS NOT 
NULL OR NULL
+07)------------FilterExec: substr(md5(CAST(value@0 AS Utf8View)), 1, 32) IS 
NOT NULL OR NULL
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), 
input_partitions=1
 09)----------------LazyMemoryExec: partitions=1, 
batch_generators=[generate_series: start=1, end=100000, batch_size=8192]
 
@@ -7913,7 +7913,7 @@ List(Field { name: "item", data_type: List(Field { name: 
"item", data_type: Int3
 query ??T
 select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
 ----
-[1, 2, 3] [[1]] List(Field { name: "item", data_type: Utf8, nullable: true, 
dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] [[1]] List(Field { name: "item", data_type: Utf8View, nullable: 
true, dict_id: 0, dict_is_ordered: false, metadata: {} })
 
 # test empty arrays return length
 # issue: https://github.com/apache/datafusion/pull/12459
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt 
b/datafusion/sqllogictest/test_files/arrow_files.slt
index 30f322cf98..62453ec4bf 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -19,6 +19,11 @@
 ## Arrow Files Format support
 #############
 
+# We using fixed arrow file to test for sqllogictests, and this arrow field is 
writing with arrow-ipc utf8,
+# so when we decode to read it's also loading utf8.
+# Currently, so we disable the map_string_types_to_utf8view
+statement ok
+set datafusion.sql_parser.map_string_types_to_utf8view = false;
 
 statement ok
 
diff --git a/datafusion/sqllogictest/test_files/avro.slt 
b/datafusion/sqllogictest/test_files/avro.slt
index 4573af1d59..2ad60c0082 100644
--- a/datafusion/sqllogictest/test_files/avro.slt
+++ b/datafusion/sqllogictest/test_files/avro.slt
@@ -15,10 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# Currently, the avro not support Utf8View type, so we disable the 
map_varchar_to_utf8view
+# Currently, the avro not support Utf8View type, so we disable the 
map_string_types_to_utf8view
 # After https://github.com/apache/arrow-rs/issues/7262 released, we can remove 
this setting
 statement ok
-set datafusion.sql_parser.map_varchar_to_utf8view = false;
+set datafusion.sql_parser.map_string_types_to_utf8view = false;
 
 statement ok
 CREATE EXTERNAL TABLE alltypes_plain (
diff --git a/datafusion/sqllogictest/test_files/ddl.slt 
b/datafusion/sqllogictest/test_files/ddl.slt
index 1e95e426f3..81f2955eff 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -828,7 +828,7 @@ drop table table_with_pk;
 statement ok
 set datafusion.catalog.information_schema = false;
 
-# Test VARCHAR is mapped to Utf8View during SQL planning when setting 
map_varchar_to_utf8view to true
+# Test VARCHAR is mapped to Utf8View during SQL planning when setting 
map_string_types_to_utf8view to true
 statement ok
 CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
 
@@ -839,7 +839,7 @@ c1 Utf8View NO
 c2 Utf8View YES
 
 statement ok
-set datafusion.sql_parser.map_varchar_to_utf8view = true;
+set datafusion.sql_parser.map_string_types_to_utf8view = true;
 
 statement ok
 CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt 
b/datafusion/sqllogictest/test_files/explain_tree.slt
index 15bf615765..8096c8cacf 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -280,7 +280,7 @@ physical_plan
 06)┌─────────────┴─────────────┐
 07)│       DataSourceExec      │
 08)│    --------------------   │
-09)│        bytes: 3120        │
+09)│        bytes: 1072        │
 10)│       format: memory      │
 11)│          rows: 2          │
 12)└───────────────────────────┘
@@ -367,7 +367,7 @@ physical_plan
 21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 22)│       DataSourceExec      ││    CoalesceBatchesExec    │
 23)│    --------------------   ││    --------------------   │
-24)│        bytes: 1560        ││     target_batch_size:    │
+24)│         bytes: 536        ││     target_batch_size:    │
 25)│       format: memory      ││            8192           │
 26)│          rows: 1          ││                           │
 27)└───────────────────────────┘└─────────────┬─────────────┘
@@ -669,7 +669,7 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│       DataSourceExec      │
 15)│    --------------------   │
-16)│        bytes: 1560        │
+16)│         bytes: 536        │
 17)│       format: memory      │
 18)│          rows: 1          │
 19)└───────────────────────────┘
@@ -1065,7 +1065,7 @@ physical_plan
 13)┌─────────────┴─────────────┐
 14)│       DataSourceExec      │
 15)│    --------------------   │
-16)│        bytes: 1560        │
+16)│         bytes: 536        │
 17)│       format: memory      │
 18)│          rows: 1          │
 19)└───────────────────────────┘
@@ -1195,60 +1195,42 @@ physical_plan
 08)│        HashJoinExec       │
 09)│    --------------------   │
 10)│            on:            │
-11)│ (int_col = int_col), (CAST├──────────────┐
-12)│   (table1.string_col AS   │              │
-13)│         Utf8View) =       │              │
-14)│         string_col)       │              │
-15)└─────────────┬─────────────┘              │
-16)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-17)│    CoalesceBatchesExec    ││    CoalesceBatchesExec    │
-18)│    --------------------   ││    --------------------   │
-19)│     target_batch_size:    ││     target_batch_size:    │
-20)│            8192           ││            8192           │
-21)└─────────────┬─────────────┘└─────────────┬─────────────┘
-22)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-23)│      RepartitionExec      ││      RepartitionExec      │
-24)│    --------------------   ││    --------------------   │
-25)│ partition_count(in->out): ││ partition_count(in->out): │
-26)│           4 -> 4          ││           4 -> 4          │
-27)│                           ││                           │
-28)│    partitioning_scheme:   ││    partitioning_scheme:   │
-29)│   Hash([int_col@0, CAST   ││      Hash([int_col@0,     │
-30)│     (table1.string_col    ││       string_col@1],      │
-31)│     AS Utf8View)@4], 4)   ││             4)            │
-32)└─────────────┬─────────────┘└─────────────┬─────────────┘
-33)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-34)│       ProjectionExec      ││      RepartitionExec      │
-35)│    --------------------   ││    --------------------   │
-36)│ CAST(table1.string_col AS ││ partition_count(in->out): │
-37)│         Utf8View):        ││           1 -> 4          │
-38)│     CAST(string_col AS    ││                           │
-39)│          Utf8View)        ││    partitioning_scheme:   │
-40)│                           ││     RoundRobinBatch(4)    │
-41)│        bigint_col:        ││                           │
-42)│         bigint_col        ││                           │
-43)│                           ││                           │
-44)│     date_col: date_col    ││                           │
-45)│      int_col: int_col     ││                           │
-46)│                           ││                           │
-47)│        string_col:        ││                           │
-48)│         string_col        ││                           │
-49)└─────────────┬─────────────┘└─────────────┬─────────────┘
-50)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-51)│      RepartitionExec      ││       DataSourceExec      │
-52)│    --------------------   ││    --------------------   │
-53)│ partition_count(in->out): ││          files: 1         │
-54)│           1 -> 4          ││      format: parquet      │
-55)│                           ││                           │
-56)│    partitioning_scheme:   ││                           │
-57)│     RoundRobinBatch(4)    ││                           │
-58)└─────────────┬─────────────┘└───────────────────────────┘
-59)┌─────────────┴─────────────┐
-60)│       DataSourceExec      │
-61)│    --------------------   │
-62)│          files: 1         │
-63)│        format: csv        │
-64)└───────────────────────────┘
+11)│   (int_col = int_col),    ├──────────────┐
+12)│       (string_col =       │              │
+13)│         string_col)       │              │
+14)└─────────────┬─────────────┘              │
+15)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+16)│    CoalesceBatchesExec    ││    CoalesceBatchesExec    │
+17)│    --------------------   ││    --------------------   │
+18)│     target_batch_size:    ││     target_batch_size:    │
+19)│            8192           ││            8192           │
+20)└─────────────┬─────────────┘└─────────────┬─────────────┘
+21)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+22)│      RepartitionExec      ││      RepartitionExec      │
+23)│    --------------------   ││    --------------------   │
+24)│ partition_count(in->out): ││ partition_count(in->out): │
+25)│           4 -> 4          ││           4 -> 4          │
+26)│                           ││                           │
+27)│    partitioning_scheme:   ││    partitioning_scheme:   │
+28)│      Hash([int_col@0,     ││      Hash([int_col@0,     │
+29)│       string_col@1],      ││       string_col@1],      │
+30)│             4)            ││             4)            │
+31)└─────────────┬─────────────┘└─────────────┬─────────────┘
+32)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+33)│      RepartitionExec      ││      RepartitionExec      │
+34)│    --------------------   ││    --------------------   │
+35)│ partition_count(in->out): ││ partition_count(in->out): │
+36)│           1 -> 4          ││           1 -> 4          │
+37)│                           ││                           │
+38)│    partitioning_scheme:   ││    partitioning_scheme:   │
+39)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
+40)└─────────────┬─────────────┘└─────────────┬─────────────┘
+41)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+42)│       DataSourceExec      ││       DataSourceExec      │
+43)│    --------------------   ││    --------------------   │
+44)│          files: 1         ││          files: 1         │
+45)│        format: csv        ││      format: parquet      │
+46)└───────────────────────────┘└───────────────────────────┘
 
 # Query with outer hash join.
 query TT
@@ -1267,60 +1249,42 @@ physical_plan
 10)│      join_type: Left      │
 11)│                           │
 12)│            on:            ├──────────────┐
-13)│ (int_col = int_col), (CAST│              │
-14)│   (table1.string_col AS   │              │
-15)│         Utf8View) =       │              │
-16)│         string_col)       │              │
-17)└─────────────┬─────────────┘              │
-18)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-19)│    CoalesceBatchesExec    ││    CoalesceBatchesExec    │
-20)│    --------------------   ││    --------------------   │
-21)│     target_batch_size:    ││     target_batch_size:    │
-22)│            8192           ││            8192           │
-23)└─────────────┬─────────────┘└─────────────┬─────────────┘
-24)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-25)│      RepartitionExec      ││      RepartitionExec      │
-26)│    --------------------   ││    --------------------   │
-27)│ partition_count(in->out): ││ partition_count(in->out): │
-28)│           4 -> 4          ││           4 -> 4          │
-29)│                           ││                           │
-30)│    partitioning_scheme:   ││    partitioning_scheme:   │
-31)│   Hash([int_col@0, CAST   ││      Hash([int_col@0,     │
-32)│     (table1.string_col    ││       string_col@1],      │
-33)│     AS Utf8View)@4], 4)   ││             4)            │
-34)└─────────────┬─────────────┘└─────────────┬─────────────┘
-35)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-36)│       ProjectionExec      ││      RepartitionExec      │
-37)│    --------------------   ││    --------------------   │
-38)│ CAST(table1.string_col AS ││ partition_count(in->out): │
-39)│         Utf8View):        ││           1 -> 4          │
-40)│     CAST(string_col AS    ││                           │
-41)│          Utf8View)        ││    partitioning_scheme:   │
-42)│                           ││     RoundRobinBatch(4)    │
-43)│        bigint_col:        ││                           │
-44)│         bigint_col        ││                           │
-45)│                           ││                           │
-46)│     date_col: date_col    ││                           │
-47)│      int_col: int_col     ││                           │
-48)│                           ││                           │
-49)│        string_col:        ││                           │
-50)│         string_col        ││                           │
-51)└─────────────┬─────────────┘└─────────────┬─────────────┘
-52)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-53)│      RepartitionExec      ││       DataSourceExec      │
-54)│    --------------------   ││    --------------------   │
-55)│ partition_count(in->out): ││          files: 1         │
-56)│           1 -> 4          ││      format: parquet      │
-57)│                           ││                           │
-58)│    partitioning_scheme:   ││                           │
-59)│     RoundRobinBatch(4)    ││                           │
-60)└─────────────┬─────────────┘└───────────────────────────┘
-61)┌─────────────┴─────────────┐
-62)│       DataSourceExec      │
-63)│    --------------------   │
-64)│          files: 1         │
-65)│        format: csv        │
-66)└───────────────────────────┘
+13)│   (int_col = int_col),    │              │
+14)│       (string_col =       │              │
+15)│         string_col)       │              │
+16)└─────────────┬─────────────┘              │
+17)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+18)│    CoalesceBatchesExec    ││    CoalesceBatchesExec    │
+19)│    --------------------   ││    --------------------   │
+20)│     target_batch_size:    ││     target_batch_size:    │
+21)│            8192           ││            8192           │
+22)└─────────────┬─────────────┘└─────────────┬─────────────┘
+23)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+24)│      RepartitionExec      ││      RepartitionExec      │
+25)│    --------------------   ││    --------------------   │
+26)│ partition_count(in->out): ││ partition_count(in->out): │
+27)│           4 -> 4          ││           4 -> 4          │
+28)│                           ││                           │
+29)│    partitioning_scheme:   ││    partitioning_scheme:   │
+30)│      Hash([int_col@0,     ││      Hash([int_col@0,     │
+31)│       string_col@1],      ││       string_col@1],      │
+32)│             4)            ││             4)            │
+33)└─────────────┬─────────────┘└─────────────┬─────────────┘
+34)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+35)│      RepartitionExec      ││      RepartitionExec      │
+36)│    --------------------   ││    --------------------   │
+37)│ partition_count(in->out): ││ partition_count(in->out): │
+38)│           1 -> 4          ││           1 -> 4          │
+39)│                           ││                           │
+40)│    partitioning_scheme:   ││    partitioning_scheme:   │
+41)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
+42)└─────────────┬─────────────┘└─────────────┬─────────────┘
+43)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+44)│       DataSourceExec      ││       DataSourceExec      │
+45)│    --------------------   ││    --------------------   │
+46)│          files: 1         ││          files: 1         │
+47)│        format: csv        ││      format: parquet      │
+48)└───────────────────────────┘└───────────────────────────┘
 
 # Query with nested loop join.
 query TT
@@ -1529,7 +1493,7 @@ physical_plan
 57)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 58)│       DataSourceExec      ││       DataSourceExec      │
 59)│    --------------------   ││    --------------------   │
-60)│        bytes: 1320        ││        bytes: 1312        │
+60)│         bytes: 296        ││         bytes: 288        │
 61)│       format: memory      ││       format: memory      │
 62)│          rows: 1          ││          rows: 1          │
 63)└───────────────────────────┘└───────────────────────────┘
@@ -1548,14 +1512,14 @@ physical_plan
 04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
 05)│       DataSourceExec      ││       ProjectionExec      │
 06)│    --------------------   ││    --------------------   │
-07)│        bytes: 1320        ││   id: CAST(id AS Int32)   │
+07)│         bytes: 296        ││   id: CAST(id AS Int32)   │
 08)│       format: memory      ││         name: name        │
 09)│          rows: 1          ││                           │
 10)└───────────────────────────┘└─────────────┬─────────────┘
 11)-----------------------------┌─────────────┴─────────────┐
 12)-----------------------------│       DataSourceExec      │
 13)-----------------------------│    --------------------   │
-14)-----------------------------│        bytes: 1312        │
+14)-----------------------------│         bytes: 288        │
 15)-----------------------------│       format: memory      │
 16)-----------------------------│          rows: 1          │
 17)-----------------------------└───────────────────────────┘
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt 
b/datafusion/sqllogictest/test_files/information_schema.slt
index dc8b7680d8..f8c86df024 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -308,7 +308,7 @@ datafusion.sql_parser.collect_spans false
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
 datafusion.sql_parser.enable_options_value_normalization false
-datafusion.sql_parser.map_varchar_to_utf8view true
+datafusion.sql_parser.map_string_types_to_utf8view true
 datafusion.sql_parser.parse_float_as_decimal false
 datafusion.sql_parser.recursion_limit 50
 datafusion.sql_parser.support_varchar_with_length true
@@ -419,7 +419,7 @@ datafusion.sql_parser.collect_spans false When set to true, 
the source locations
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by 
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, 
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB 
and Databricks.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL 
parser will normalize ident (convert ident to lowercase when not quoted)
 datafusion.sql_parser.enable_options_value_normalization false When set to 
true, SQL parser will normalize options value (convert value to lowercase). 
Note that this option is ignored and will be removed in the future. All 
case-insensitive values are normalized automatically.
-datafusion.sql_parser.map_varchar_to_utf8view true If true, `VARCHAR` is 
mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to 
`Utf8`  during SQL planning. Default is false.
+datafusion.sql_parser.map_string_types_to_utf8view true If true, string types 
(VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. 
If false, they are mapped to `Utf8`. Default is true.
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL 
parser will parse float as decimal type
 datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit 
when parsing complex SQL Queries
 datafusion.sql_parser.support_varchar_with_length true If true, permit lengths 
for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if 
a `VARCHAR` with a length is specified. The Arrow type system does not have a 
notion of maximum string length and thus DataFusion can not enforce such limits.
diff --git a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt 
b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
index e8700b1fea..9c806cfa0d 100644
--- a/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
+++ b/datafusion/sqllogictest/test_files/monotonic_projection_test.slt
@@ -129,12 +129,12 @@ ORDER BY a_str ASC, b ASC;
 ----
 logical_plan
 01)Sort: a_str ASC NULLS LAST, multiple_ordered_table.b ASC NULLS LAST
-02)--Projection: CAST(multiple_ordered_table.a AS Utf8) AS a_str, 
multiple_ordered_table.b
+02)--Projection: CAST(multiple_ordered_table.a AS Utf8View) AS a_str, 
multiple_ordered_table.b
 03)----TableScan: multiple_ordered_table projection=[a, b]
 physical_plan
 01)SortPreservingMergeExec: [a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST]
 02)--SortExec: expr=[a_str@0 ASC NULLS LAST, b@1 ASC NULLS LAST], 
preserve_partitioning=[true]
-03)----ProjectionExec: expr=[CAST(a@0 AS Utf8) as a_str, b@1 as b]
+03)----ProjectionExec: expr=[CAST(a@0 AS Utf8View) as a_str, b@1 as b]
 04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 05)--------DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], 
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], file_type=csv, 
has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt 
b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
index e5b5f5ac87..5c0419b69d 100644
--- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
+++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
@@ -219,7 +219,7 @@ physical_plan
 query TT
 EXPLAIN select * from t_pushdown where part != 'a';
 ----
-logical_plan TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part != Utf8("a")]
+logical_plan TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part != Utf8View("a")]
 physical_plan DataSourceExec: file_groups={2 groups: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=b/file.parquet],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=c/file.parquet]]},
 projection=[val, part], file_type=parquet
 
 # And if we reference only a file column it gets pushed down
@@ -227,8 +227,8 @@ query TT
 EXPLAIN select * from t_pushdown where val != 'c';
 ----
 logical_plan
-01)Filter: t_pushdown.val != Utf8("c")
-02)--TableScan: t_pushdown projection=[val, part], 
partial_filters=[t_pushdown.val != Utf8("c")]
+01)Filter: t_pushdown.val != Utf8View("c")
+02)--TableScan: t_pushdown projection=[val, part], 
partial_filters=[t_pushdown.val != Utf8View("c")]
 physical_plan DataSourceExec: file_groups={3 groups: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=b/file.parquet],
 
[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=c/file.parquet]]},
 projection=[val, part], file_type=parquet, predicate=val@0 != c, pr [...]
 
 # If we have a mix of filters:
@@ -239,8 +239,8 @@ query TT
 EXPLAIN select * from t_pushdown where val != 'd' AND val != 'c' AND part = 
'a' AND part != val;
 ----
 logical_plan
-01)Filter: t_pushdown.val != Utf8("d") AND t_pushdown.val != Utf8("c") AND 
t_pushdown.val != t_pushdown.part
-02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8("a")], partial_filters=[t_pushdown.val != 
Utf8("d"), t_pushdown.val != Utf8("c"), t_pushdown.val != t_pushdown.part]
+01)Filter: t_pushdown.val != Utf8View("d") AND t_pushdown.val != Utf8View("c") 
AND t_pushdown.val != t_pushdown.part
+02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val 
!= Utf8View("d"), t_pushdown.val != Utf8View("c"), t_pushdown.val != 
t_pushdown.part]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: val@0 != part@1
@@ -253,7 +253,7 @@ EXPLAIN select val, part from t_pushdown where part = 'a' 
AND part = val;
 ----
 logical_plan
 01)Filter: t_pushdown.val = t_pushdown.part
-02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8("a")], partial_filters=[t_pushdown.val = 
t_pushdown.part]
+02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val 
= t_pushdown.part]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: val@0 = part@1
@@ -270,7 +270,7 @@ EXPLAIN select val, part from t_pushdown where part = val 
AND part = 'a';
 ----
 logical_plan
 01)Filter: t_pushdown.val = t_pushdown.part
-02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8("a")], partial_filters=[t_pushdown.val = 
t_pushdown.part]
+02)--TableScan: t_pushdown projection=[val, part], 
full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val 
= t_pushdown.part]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: val@0 = part@1
@@ -280,4 +280,4 @@ physical_plan
 query TT
 select val, part from t_pushdown where part = val AND part = 'a';
 ----
-a a
\ No newline at end of file
+a a
diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt 
b/datafusion/sqllogictest/test_files/push_down_filter.slt
index ed948dd114..a0d3193324 100644
--- a/datafusion/sqllogictest/test_files/push_down_filter.slt
+++ b/datafusion/sqllogictest/test_files/push_down_filter.slt
@@ -265,7 +265,7 @@ physical_plan DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/
 query TT
 explain select a from t where CAST(a AS string) = '0123';
 ----
-physical_plan DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/t.parquet]]},
 projection=[a], file_type=parquet, predicate=CAST(a@0 AS Utf8) = 0123
+physical_plan DataSourceExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/push_down_filter/t.parquet]]},
 projection=[a], file_type=parquet, predicate=CAST(a@0 AS Utf8View) = 0123
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/scalar.slt 
b/datafusion/sqllogictest/test_files/scalar.slt
index f583d659fd..ca0b472de9 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -1832,7 +1832,7 @@ query TT
 EXPLAIN SELECT letter, letter = LEFT('APACHE', 1) FROM simple_string;
 ----
 logical_plan
-01)Projection: simple_string.letter, simple_string.letter = Utf8("A") AS 
simple_string.letter = left(Utf8("APACHE"),Int64(1))
+01)Projection: simple_string.letter, simple_string.letter = Utf8View("A") AS 
simple_string.letter = left(Utf8("APACHE"),Int64(1))
 02)--TableScan: simple_string projection=[letter]
 physical_plan
 01)ProjectionExec: expr=[letter@0 as letter, letter@0 = A as 
simple_string.letter = left(Utf8("APACHE"),Int64(1))]
@@ -1851,10 +1851,10 @@ query TT
 EXPLAIN SELECT letter, letter = LEFT(letter2, 1) FROM simple_string;
 ----
 logical_plan
-01)Projection: simple_string.letter, simple_string.letter = 
left(simple_string.letter2, Int64(1))
+01)Projection: simple_string.letter, simple_string.letter = 
CAST(left(simple_string.letter2, Int64(1)) AS Utf8View)
 02)--TableScan: simple_string projection=[letter, letter2]
 physical_plan
-01)ProjectionExec: expr=[letter@0 as letter, letter@0 = left(letter2@1, 1) as 
simple_string.letter = left(simple_string.letter2,Int64(1))]
+01)ProjectionExec: expr=[letter@0 as letter, letter@0 = CAST(left(letter2@1, 
1) AS Utf8View) as simple_string.letter = left(simple_string.letter2,Int64(1))]
 02)--DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TB
diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt 
b/datafusion/sqllogictest/test_files/simplify_expr.slt
index 075ccafcfd..c77163dc99 100644
--- a/datafusion/sqllogictest/test_files/simplify_expr.slt
+++ b/datafusion/sqllogictest/test_files/simplify_expr.slt
@@ -35,22 +35,22 @@ query TT
 explain select b from t where b ~ '.*'
 ----
 logical_plan
-01)Filter: t.b IS NOT NULL
+01)Filter: t.b ~ Utf8View(".*")
 02)--TableScan: t projection=[b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: b@0 IS NOT NULL
+02)--FilterExec: b@0 ~ .*
 03)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select b from t where b !~ '.*'
 ----
 logical_plan
-01)Filter: t.b = Utf8("")
+01)Filter: t.b !~ Utf8View(".*")
 02)--TableScan: t projection=[b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: b@0 = 
+02)--FilterExec: b@0 !~ .*
 03)----DataSourceExec: partitions=1, partition_sizes=[1]
 
 query T
diff --git a/datafusion/sqllogictest/test_files/union.slt 
b/datafusion/sqllogictest/test_files/union.slt
index d549f555f9..f901a4d373 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -230,7 +230,7 @@ logical_plan
 02)--Union
 03)----TableScan: t1 projection=[name]
 04)----TableScan: t2 projection=[name]
-05)----Projection: t2.name || Utf8("_new") AS name
+05)----Projection: t2.name || Utf8View("_new") AS name
 06)------TableScan: t2 projection=[name]
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
@@ -266,7 +266,7 @@ logical_plan
 01)Union
 02)--TableScan: t1 projection=[name]
 03)--TableScan: t2 projection=[name]
-04)--Projection: t2.name || Utf8("_new") AS name
+04)--Projection: t2.name || Utf8View("_new") AS name
 05)----TableScan: t2 projection=[name]
 physical_plan
 01)UnionExec
diff --git a/docs/source/user-guide/configs.md 
b/docs/source/user-guide/configs.md
index 42282e39e4..5c80cbd563 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -131,7 +131,7 @@ Environment variables are read during `SessionConfig` 
initialisation so they mus
 | datafusion.sql_parser.enable_options_value_normalization                | 
false                     | When set to true, SQL parser will normalize options 
value (convert value to lowercase). Note that this option is ignored and will 
be removed in the future. All case-insensitive values are normalized 
automatically.                                                                  
                                                                                
                              [...]
 | datafusion.sql_parser.dialect                                           | 
generic                   | Configure the SQL dialect used by DataFusion's 
parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, 
Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.  
                                                                                
                                                                                
                          [...]
 | datafusion.sql_parser.support_varchar_with_length                       | 
true                      | If true, permit lengths for `VARCHAR` such as 
`VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a 
length is specified. The Arrow type system does not have a notion of maximum 
string length and thus DataFusion can not enforce such limits.                  
                                                                                
                              [...]
-| datafusion.sql_parser.map_varchar_to_utf8view                           | 
true                      | If true, `VARCHAR` is mapped to `Utf8View` during 
SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. 
Default is false.                                                               
                                                                                
                                                                                
                        [...]
+| datafusion.sql_parser.map_string_types_to_utf8view                      | 
true                      | If true, string types (VARCHAR, CHAR, Text, and 
String) are mapped to `Utf8View` during SQL planning. If false, they are mapped 
to `Utf8`. Default is true.                                                     
                                                                                
                                                                                
                     [...]
 | datafusion.sql_parser.collect_spans                                     | 
false                     | When set to true, the source locations relative to 
the original SQL query (i.e. 
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
 will be collected and recorded in the logical plan nodes.                      
                                                                                
                                                                     [...]
 | datafusion.sql_parser.recursion_limit                                   | 50 
                       | Specifies the recursion depth limit when parsing 
complex SQL Queries                                                             
                                                                                
                                                                                
                                                                                
                    [...]
 | datafusion.format.safe                                                  | 
true                      | If set to `true` any formatting errors will be 
written to the output instead of being converted into a [`std::fmt::Error`]     
                                                                                
                                                                                
                                                                                
                      [...]


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

(datafusion) branch main updated: feat: mapping sql Char/Text/String default to Utf8View (#16290)

Reply via email to