This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 443f492b Require space after -- to start single line comment in MySQL 
(#1705)
443f492b is described below

commit 443f492b4b9443d45266c77b073b50b91bcb95ed
Author: Hans Ott <[email protected]>
AuthorDate: Wed Feb 5 20:47:17 2025 +0100

    Require space after -- to start single line comment in MySQL (#1705)
---
 src/dialect/mod.rs       |   9 ++++
 src/dialect/mysql.rs     |   4 ++
 src/tokenizer.rs         | 105 ++++++++++++++++++++++++++++++++++++++++++++---
 tests/sqlparser_mysql.rs |  15 +++++++
 4 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 205395f6..965e6c77 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -881,6 +881,15 @@ pub trait Dialect: Debug + Any {
     fn supports_table_hints(&self) -> bool {
         false
     }
+
+    /// Returns true if this dialect requires a whitespace character after 
`--` to start a single line comment.
+    ///
+    /// MySQL: 
<https://dev.mysql.com/doc/refman/8.4/en/ansi-diff-comments.html>
+    /// e.g. UPDATE account SET balance=balance--1
+    //       WHERE account_id=5752             ^^^ will be interpreted as two 
minus signs instead of a comment
+    fn requires_single_line_comment_whitespace(&self) -> bool {
+        false
+    }
 }
 
 /// This represents the operators for which precedence must be defined
diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs
index a67fe67b..55b91ad2 100644
--- a/src/dialect/mysql.rs
+++ b/src/dialect/mysql.rs
@@ -125,6 +125,10 @@ impl Dialect for MySqlDialect {
     fn supports_table_hints(&self) -> bool {
         true
     }
+
+    fn requires_single_line_comment_whitespace(&self) -> bool {
+        true
+    }
 }
 
 /// `LOCK TABLES`
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7742e8fa..d4e530c9 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1229,14 +1229,26 @@ impl<'a> Tokenizer<'a> {
                 // operators
                 '-' => {
                     chars.next(); // consume the '-'
+
                     match chars.peek() {
                         Some('-') => {
-                            chars.next(); // consume the second '-', starting 
a single-line comment
-                            let comment = 
self.tokenize_single_line_comment(chars);
-                            
Ok(Some(Token::Whitespace(Whitespace::SingleLineComment {
-                                prefix: "--".to_owned(),
-                                comment,
-                            })))
+                            let mut is_comment = true;
+                            if 
self.dialect.requires_single_line_comment_whitespace() {
+                                is_comment = Some(' ') == 
chars.peekable.clone().nth(1);
+                            }
+
+                            if is_comment {
+                                chars.next(); // consume second '-'
+                                let comment = 
self.tokenize_single_line_comment(chars);
+                                return Ok(Some(Token::Whitespace(
+                                    Whitespace::SingleLineComment {
+                                        prefix: "--".to_owned(),
+                                        comment,
+                                    },
+                                )));
+                            }
+
+                            self.start_binop(chars, "-", Token::Minus)
                         }
                         Some('>') => {
                             chars.next();
@@ -3685,4 +3697,85 @@ mod tests {
             ],
         );
     }
+
+    #[test]
+    fn test_whitespace_required_after_single_line_comment() {
+        all_dialects_where(|dialect| 
dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT --'abc'",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Minus,
+                    Token::Minus,
+                    Token::SingleQuotedString("abc".to_string()),
+                ],
+            );
+
+        all_dialects_where(|dialect| 
dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT -- 'abc'",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: " 'abc'".to_string(),
+                    }),
+                ],
+            );
+
+        all_dialects_where(|dialect| 
dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT --",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Minus,
+                    Token::Minus,
+                ],
+            );
+    }
+
+    #[test]
+    fn test_whitespace_not_required_after_single_line_comment() {
+        all_dialects_where(|dialect| 
!dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT --'abc'",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: "'abc'".to_string(),
+                    }),
+                ],
+            );
+
+        all_dialects_where(|dialect| 
!dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT -- 'abc'",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: " 'abc'".to_string(),
+                    }),
+                ],
+            );
+
+        all_dialects_where(|dialect| 
!dialect.requires_single_line_comment_whitespace())
+            .tokenizes_to(
+                "SELECT --",
+                vec![
+                    Token::make_keyword("SELECT"),
+                    Token::Whitespace(Whitespace::Space),
+                    Token::Whitespace(Whitespace::SingleLineComment {
+                        prefix: "--".to_string(),
+                        comment: "".to_string(),
+                    }),
+                ],
+            );
+    }
 }
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index 9f00a921..6bf9076d 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -3250,3 +3250,18 @@ fn parse_double_precision() {
         "CREATE TABLE foo (bar DOUBLE(11,0))",
     );
 }
+
+#[test]
+fn parse_looks_like_single_line_comment() {
+    mysql().one_statement_parses_to(
+        "UPDATE account SET balance=balance--1 WHERE account_id=5752",
+        "UPDATE account SET balance = balance - -1 WHERE account_id = 5752",
+    );
+    mysql().one_statement_parses_to(
+        r#"
+            UPDATE account SET balance=balance-- 1
+            WHERE account_id=5752
+        "#,
+        "UPDATE account SET balance = balance WHERE account_id = 5752",
+    );
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to