This is an automated email from the ASF dual-hosted git repository.
iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0cd49fb6 Start new line if \r in Postgres dialect (#1647)
0cd49fb6 is described below
commit 0cd49fb6999f7945d5e64a2c93f34f4c25a4a962
Author: Hans Ott <[email protected]>
AuthorDate: Tue Jan 7 18:35:03 2025 +0100
Start new line if \r in Postgres dialect (#1647)
---
src/tokenizer.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 57 insertions(+), 6 deletions(-)
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 38bd33d6..b517ed66 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1621,11 +1621,17 @@ impl<'a> Tokenizer<'a> {
// Consume characters until newline
fn tokenize_single_line_comment(&self, chars: &mut State) -> String {
- let mut comment = peeking_take_while(chars, |ch| ch != '\n');
+ let mut comment = peeking_take_while(chars, |ch| match ch {
+ '\n' => false, // Always
stop at \n
+ '\r' if dialect_of!(self is PostgreSqlDialect) => false, // Stop
at \r for Postgres
+ _ => true, // Keep consuming for other characters
+ });
+
if let Some(ch) = chars.next() {
- assert_eq!(ch, '\n');
+ assert!(ch == '\n' || ch == '\r');
comment.push(ch);
}
+
comment
}
@@ -2677,17 +2683,62 @@ mod tests {
#[test]
fn tokenize_comment() {
- let sql = String::from("0--this is a comment\n1");
+ let test_cases = vec![
+ (
+ String::from("0--this is a comment\n1"),
+ vec![
+ Token::Number("0".to_string(), false),
+ Token::Whitespace(Whitespace::SingleLineComment {
+ prefix: "--".to_string(),
+ comment: "this is a comment\n".to_string(),
+ }),
+ Token::Number("1".to_string(), false),
+ ],
+ ),
+ (
+ String::from("0--this is a comment\r1"),
+ vec![
+ Token::Number("0".to_string(), false),
+ Token::Whitespace(Whitespace::SingleLineComment {
+ prefix: "--".to_string(),
+ comment: "this is a comment\r1".to_string(),
+ }),
+ ],
+ ),
+ (
+ String::from("0--this is a comment\r\n1"),
+ vec![
+ Token::Number("0".to_string(), false),
+ Token::Whitespace(Whitespace::SingleLineComment {
+ prefix: "--".to_string(),
+ comment: "this is a comment\r\n".to_string(),
+ }),
+ Token::Number("1".to_string(), false),
+ ],
+ ),
+ ];
let dialect = GenericDialect {};
+
+ for (sql, expected) in test_cases {
+ let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+ compare(expected, tokens);
+ }
+ }
+
+ #[test]
+ fn tokenize_comment_postgres() {
+ let sql = String::from("1--\r0");
+
+ let dialect = PostgreSqlDialect {};
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
let expected = vec![
- Token::Number("0".to_string(), false),
+ Token::Number("1".to_string(), false),
Token::Whitespace(Whitespace::SingleLineComment {
prefix: "--".to_string(),
- comment: "this is a comment\n".to_string(),
+ comment: "\r".to_string(),
}),
- Token::Number("1".to_string(), false),
+ Token::Number("0".to_string(), false),
];
compare(expected, tokens);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]