This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bd071be1dc feat: add `custom_string_literal_override` to unparser 
Dialect trait (#20590)
bd071be1dc is described below

commit bd071be1dcc0b7e50ed224e8bf83e15470f20f8d
Author: Jax Liu <[email protected]>
AuthorDate: Tue Mar 17 04:44:46 2026 +0800

    feat: add `custom_string_literal_override` to unparser Dialect trait 
(#20590)
    
    ## Which issue does this PR close?
    
    - Closes #.
    
    ## Rationale for this change
    
    When unparsing queries targeting databases like MSSQL, non-ASCII string
    literals need special handling. MSSQL requires the `N'...'` (national
    string literal) prefix for strings containing Unicode characters.
    Currently the unparser always emits single-quoted strings with no way
    for dialects to customize this behavior.
    
    ## What changes are included in this PR?
    
    - Add a new `custom_string_literal_override` method to the `Dialect`
    trait with a default implementation returning `None` (no override).
    - Consolidate the `Utf8`, `Utf8View`, and `LargeUtf8` match arms in
    `scalar_value_to_sql` and route them through the new dialect hook.
    
    ## Are these changes tested?
    
    Yes. A test-only `MsSqlDialect` is defined in the test module to verify:
    - ASCII strings produce standard single-quoted literals (no `N` prefix)
    - Non-ASCII strings produce national string literals (`N'...`')
    - The default dialect is unaffected (no `N` prefix regardless of
    content)
    
    It's used by Wren AI in production for a while:
    https://github.com/Canner/datafusion/pull/8
    
    ## Are there any user-facing changes?
    
    Yes. The `Dialect` trait gains a new method
    `custom_string_literal_override`. This is a non-breaking change since
    the method has a default implementation. Dialect implementors can
    override it to customize string literal unparsing.
---
 datafusion/sql/src/unparser/dialect.rs | 11 +++++
 datafusion/sql/src/unparser/expr.rs    | 84 +++++++++++++++++++++++++++++-----
 2 files changed, 84 insertions(+), 11 deletions(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs 
b/datafusion/sql/src/unparser/dialect.rs
index 31d2662cc4..fe278a0e1e 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -248,6 +248,17 @@ pub trait Dialect: Send + Sync {
     fn supports_empty_select_list(&self) -> bool {
         false
     }
+
+    /// Override the default string literal unparsing.
+    ///
+    /// Returns `Some(ast::Expr)` to replace the default single-quoted string,
+    /// or `None` to use the default behavior.
+    ///
+    /// For example, MSSQL requires non-ASCII strings to use national string
+    /// literal syntax (`N'datafusion資料融合'`).
+    fn string_literal_to_sql(&self, _s: &str) -> Option<ast::Expr> {
+        None
+    }
 }
 
 /// `IntervalStyle` to use for unparsing
diff --git a/datafusion/sql/src/unparser/expr.rs 
b/datafusion/sql/src/unparser/expr.rs
index 503048bb3c..54c8eeb125 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -1294,18 +1294,17 @@ impl Unparser<'_> {
                 Ok(ast::Expr::value(ast::Value::Number(ui.to_string(), false)))
             }
             ScalarValue::UInt64(None) => 
Ok(ast::Expr::value(ast::Value::Null)),
-            ScalarValue::Utf8(Some(str)) => {
-                Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
-            }
-            ScalarValue::Utf8(None) => Ok(ast::Expr::value(ast::Value::Null)),
-            ScalarValue::Utf8View(Some(str)) => {
-                Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
-            }
-            ScalarValue::Utf8View(None) => 
Ok(ast::Expr::value(ast::Value::Null)),
-            ScalarValue::LargeUtf8(Some(str)) => {
+            ScalarValue::Utf8(Some(str))
+            | ScalarValue::Utf8View(Some(str))
+            | ScalarValue::LargeUtf8(Some(str)) => {
+                if let Some(expr) = self.dialect.string_literal_to_sql(str) {
+                    return Ok(expr);
+                }
                 Ok(ast::Expr::value(SingleQuotedString(str.to_string())))
             }
-            ScalarValue::LargeUtf8(None) => 
Ok(ast::Expr::value(ast::Value::Null)),
+            ScalarValue::Utf8(None)
+            | ScalarValue::Utf8View(None)
+            | ScalarValue::LargeUtf8(None) => 
Ok(ast::Expr::value(ast::Value::Null)),
             ScalarValue::Binary(Some(_)) => not_impl_err!("Unsupported scalar: 
{v:?}"),
             ScalarValue::Binary(None) => 
Ok(ast::Expr::value(ast::Value::Null)),
             ScalarValue::BinaryView(Some(_)) => {
@@ -2397,7 +2396,6 @@ mod tests {
 
         let expected = r#"('a' > 4)"#;
         assert_eq!(actual, expected);
-
         Ok(())
     }
 
@@ -2960,6 +2958,70 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_mssql_dialect_national_literal() -> Result<()> {
+        struct MsSqlDialect;
+
+        impl Dialect for MsSqlDialect {
+            fn identifier_quote_style(&self, _identifier: &str) -> 
Option<char> {
+                Some('[')
+            }
+
+            fn string_literal_to_sql(&self, s: &str) -> Option<ast::Expr> {
+                if !s.is_ascii() {
+                    Some(ast::Expr::value(ast::Value::NationalStringLiteral(
+                        s.to_string(),
+                    )))
+                } else {
+                    None
+                }
+            }
+        }
+
+        let dialect = MsSqlDialect;
+        let unparser = Unparser::new(&dialect);
+
+        // Get nation string literal for the custom mssql dialect
+        for (s, expected) in [
+            ("national string", "'national string'"),
+            ("datafusion資料融合", "N'datafusion資料融合'"),
+        ] {
+            let expr = Expr::Literal(ScalarValue::Utf8(Some(s.to_string())), 
None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+
+            let expr = 
Expr::Literal(ScalarValue::Utf8View(Some(s.to_string())), None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+
+            let expr = 
Expr::Literal(ScalarValue::LargeUtf8(Some(s.to_string())), None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+        }
+
+        let dialect = DefaultDialect {};
+        let unparser = Unparser::new(&dialect);
+
+        // Get normal string literal for default dialect
+        for (s, expected) in [
+            ("national string", "'national string'"),
+            ("datafusion資料融合", "'datafusion資料融合'"),
+        ] {
+            let expr = Expr::Literal(ScalarValue::Utf8(Some(s.to_string())), 
None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+
+            let expr = 
Expr::Literal(ScalarValue::Utf8View(Some(s.to_string())), None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+
+            let expr = 
Expr::Literal(ScalarValue::LargeUtf8(Some(s.to_string())), None);
+            let ast = unparser.expr_to_sql(&expr)?;
+            assert_eq!(ast.to_string(), expected);
+        }
+        Ok(())
+    }
+
     #[test]
     fn test_cast_value_to_dict_expr() {
         let tests = [(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to