(datafusion-sqlparser-rs) branch main updated: Improve accuracy of supports_string_literal_concatenation_with_newline (#2348)

github-bot Thu, 28 May 2026 06:50:20 -0700

This is an automated email from the ASF dual-hosted git repository.

github-merge-queue[bot] pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 0d638963 Improve accuracy of 
supports_string_literal_concatenation_with_newline (#2348)
0d638963 is described below

commit 0d638963fdcc1af73c2edeed2ae3a75b217bfbc4
Author: Yoav Cohen <[email protected]>
AuthorDate: Thu May 28 15:35:01 2026 +0200

    Improve accuracy of supports_string_literal_concatenation_with_newline 
(#2348)
---
 src/ast/comments.rs          |  4 ++--
 src/ast/mod.rs               |  3 ++-
 src/tokenizer.rs             | 31 +++++++++++++++----------------
 tests/sqlparser_comments.rs  |  4 ++--
 tests/sqlparser_common.rs    |  8 ++++++++
 tests/sqlparser_oracle.rs    |  7 ++-----
 tests/sqlparser_snowflake.rs |  6 ++++--
 7 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/src/ast/comments.rs b/src/ast/comments.rs
index d48e4f5b..a0c25ad1 100644
--- a/src/ast/comments.rs
+++ b/src/ast/comments.rs
@@ -71,7 +71,7 @@ impl Comments {
     /// // all comments appearing before line seven, i.e. before the first 
statement itself
     /// assert_eq!(
     ///    &comments.find(..Location::new(7, 1)).map(|c| 
c.as_str()).collect::<Vec<_>>(),
-    ///    &["\n header comment ...\n ... spanning multiple lines\n", " first 
statement\n"]);
+    ///    &["\n header comment ...\n ... spanning multiple lines\n", " first 
statement"]);
     ///
     /// // all comments appearing within the first statement
     /// assert_eq!(
@@ -81,7 +81,7 @@ impl Comments {
     /// // all comments appearing within or after the first statement
     /// assert_eq!(
     ///    &comments.find(Location::new(7, 1)..).map(|c| 
c.as_str()).collect::<Vec<_>>(),
-    ///    &[" world ", " second statement\n", " trailing comment\n"]);
+    ///    &[" world ", " second statement", " trailing comment"]);
     /// ```
     ///
     /// The [Spanned](crate::ast::Spanned) trait allows you to access location
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index d737cdb3..e494553c 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -12085,7 +12085,8 @@ impl fmt::Display for OptimizerHint {
                 f.write_str(prefix)?;
                 f.write_str(&self.prefix)?;
                 f.write_str("+")?;
-                f.write_str(&self.text)
+                f.write_str(&self.text)?;
+                f.write_str("\n")
             }
             OptimizerHintStyle::MultiLine => {
                 f.write_str("/*")?;
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d9f131f8..4c3668f8 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -521,7 +521,7 @@ impl fmt::Display for Whitespace {
             Whitespace::Space => f.write_str(" "),
             Whitespace::Newline => f.write_str("\n"),
             Whitespace::Tab => f.write_str("\t"),
-            Whitespace::SingleLineComment { prefix, comment } => write!(f, 
"{prefix}{comment}"),
+            Whitespace::SingleLineComment { prefix, comment } => writeln!(f, 
"{prefix}{comment}"),
             Whitespace::MultiLineComment(s) => write!(f, "/*{s}*/"),
         }
     }
@@ -2037,18 +2037,11 @@ impl<'a> Tokenizer<'a> {
 
     // Consume characters until newline
     fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
-        let mut comment = peeking_take_while(chars, |ch| match ch {
+        peeking_take_while(chars, |ch| match ch {
             '\n' => false,                                           // Always 
stop at \n
             '\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop 
at \r for Postgres
             _ => true, // Keep consuming for other characters
-        });
-
-        if let Some(ch) = chars.next() {
-            assert!(ch == '\n' || ch == '\r');
-            comment.push(ch);
-        }
-
-        comment
+        })
     }
 
     /// Tokenize an identifier or keyword, after the first char is already 
consumed.
@@ -3346,8 +3339,9 @@ mod tests {
                     Token::Number("0".to_string(), false),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
-                        comment: "this is a comment\n".to_string(),
+                        comment: "this is a comment".to_string(),
                     }),
+                    Token::Whitespace(Whitespace::Newline),
                     Token::Number("1".to_string(), false),
                 ],
             ),
@@ -3367,8 +3361,9 @@ mod tests {
                     Token::Number("0".to_string(), false),
                     Token::Whitespace(Whitespace::SingleLineComment {
                         prefix: "--".to_string(),
-                        comment: "this is a comment\r\n".to_string(),
+                        comment: "this is a comment\r".to_string(),
                     }),
+                    Token::Whitespace(Whitespace::Newline),
                     Token::Number("1".to_string(), false),
                 ],
             ),
@@ -3392,8 +3387,9 @@ mod tests {
             Token::Number("1".to_string(), false),
             Token::Whitespace(Whitespace::SingleLineComment {
                 prefix: "--".to_string(),
-                comment: "\r".to_string(),
+                comment: "".to_string(),
             }),
+            Token::Whitespace(Whitespace::Newline), // Postgres treats \r as 
newline in single-line comments
             Token::Number("0".to_string(), false),
         ];
         compare(expected, tokens);
@@ -4220,16 +4216,19 @@ mod tests {
             vec![
                 Token::Whitespace(Whitespace::SingleLineComment {
                     prefix: "--".to_string(),
-                    comment: "\n".to_string(),
+                    comment: "".to_string(),
                 }),
+                Token::Whitespace(Whitespace::Newline),
                 Token::Whitespace(Whitespace::SingleLineComment {
                     prefix: "--".to_string(),
-                    comment: " Table structure for table...\n".to_string(),
+                    comment: " Table structure for table...".to_string(),
                 }),
+                Token::Whitespace(Whitespace::Newline),
                 Token::Whitespace(Whitespace::SingleLineComment {
                     prefix: "--".to_string(),
-                    comment: "\n".to_string(),
+                    comment: "".to_string(),
                 }),
+                Token::Whitespace(Whitespace::Newline),
             ],
         );
     }
diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs
index 34442ca3..8b416438 100644
--- a/tests/sqlparser_comments.rs
+++ b/tests/sqlparser_comments.rs
@@ -50,10 +50,10 @@ more*/
         vec![
             CommentWithSpan {
                 comment: Comment::SingleLine {
-                    content: " second line comment\n".into(),
+                    content: " second line comment".into(),
                     prefix: "--".into()
                 },
-                span: Span::new((2, 1).into(), (3, 1).into()),
+                span: Span::new((2, 1).into(), (2, 23).into()),
             },
             CommentWithSpan {
                 comment: Comment::MultiLine(" inline comment after `from` 
".into()),
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index f470b93c..c2d298d0 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -18357,6 +18357,14 @@ fn parse_adjacent_string_literal_concatenation() {
         'd'
     )"#;
     dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')");
+
+    let sql = r#"
+    SELECT 'abc' in ('a'
+        'b' -- COMMENT
+        'c',
+        'd'
+    )"#;
+    dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')");
 }
 
 #[test]
diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs
index 35f08311..888778e2 100644
--- a/tests/sqlparser_oracle.rs
+++ b/tests/sqlparser_oracle.rs
@@ -364,10 +364,7 @@ fn test_optimizer_hints() {
         "SELECT --+ one two three /* asdf */\n 1 FROM dual",
     );
     assert_eq!(select.optimizer_hints.len(), 1);
-    assert_eq!(
-        select.optimizer_hints[0].text,
-        " one two three /* asdf */\n"
-    );
+    assert_eq!(select.optimizer_hints[0].text, " one two three /* asdf */");
     assert_eq!(select.optimizer_hints[0].prefix, "");
 
     // inserts
@@ -396,7 +393,7 @@ fn test_optimizer_hints() {
     );
     assert_eq!(select.optimizer_hints.len(), 1);
     assert_eq!(select.optimizer_hints[0].prefix, "abc");
-    assert_eq!(select.optimizer_hints[0].text, " text\n");
+    assert_eq!(select.optimizer_hints[0].text, " text");
 }
 
 #[test]
diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs
index 8cd6a3fc..0000b0a3 100644
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@@ -596,8 +596,9 @@ fn test_snowflake_single_line_tokenize() {
         Token::make_keyword("TABLE"),
         Token::Whitespace(Whitespace::SingleLineComment {
             prefix: "#".to_string(),
-            comment: " this is a comment \n".to_string(),
+            comment: " this is a comment ".to_string(),
         }),
+        Token::Whitespace(Whitespace::Newline),
         Token::make_word("table_1", None),
     ];
 
@@ -613,8 +614,9 @@ fn test_snowflake_single_line_tokenize() {
         Token::Whitespace(Whitespace::Space),
         Token::Whitespace(Whitespace::SingleLineComment {
             prefix: "//".to_string(),
-            comment: " this is a comment \n".to_string(),
+            comment: " this is a comment ".to_string(),
         }),
+        Token::Whitespace(Whitespace::Newline),
         Token::make_word("table_1", None),
     ];
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-sqlparser-rs) branch main updated: Improve accuracy of supports_string_literal_concatenation_with_newline (#2348)

Reply via email to