This is an automated email from the ASF dual-hosted git repository.
iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new c808c4e4 Correctly look for end delimiter dollar quoted string (#1650)
c808c4e4 is described below
commit c808c4e4fdc0131396b5967e489c2c0bcfac9e5b
Author: Hans Ott <[email protected]>
AuthorDate: Sun Jan 12 21:34:09 2025 +0100
Correctly look for end delimiter dollar quoted string (#1650)
---
src/tokenizer.rs | 176 ++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 129 insertions(+), 47 deletions(-)
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 15b13122..5f9c0f98 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1566,46 +1566,33 @@ impl<'a> Tokenizer<'a> {
if matches!(chars.peek(), Some('$')) &&
!self.dialect.supports_dollar_placeholder() {
chars.next();
- 'searching_for_end: loop {
- s.push_str(&peeking_take_while(chars, |ch| ch != '$'));
- match chars.peek() {
- Some('$') => {
- chars.next();
- let mut maybe_s = String::from("$");
- for c in value.chars() {
- if let Some(next_char) = chars.next() {
- maybe_s.push(next_char);
- if next_char != c {
- // This doesn't match the dollar quote
delimiter so this
- // is not the end of the string.
- s.push_str(&maybe_s);
- continue 'searching_for_end;
- }
- } else {
- return self.tokenizer_error(
- chars.location(),
- "Unterminated dollar-quoted, expected
$",
- );
+ let mut temp = String::new();
+ let end_delimiter = format!("${}$", value);
+
+ loop {
+ match chars.next() {
+ Some(ch) => {
+ temp.push(ch);
+
+ if temp.ends_with(&end_delimiter) {
+ if let Some(temp) =
temp.strip_suffix(&end_delimiter) {
+ s.push_str(temp);
}
- }
- if chars.peek() == Some(&'$') {
- chars.next();
- maybe_s.push('$');
- // maybe_s matches the end delimiter
- break 'searching_for_end;
- } else {
- // This also doesn't match the dollar quote
delimiter as there are
- // more characters before the second dollar so
this is not the end
- // of the string.
- s.push_str(&maybe_s);
- continue 'searching_for_end;
+ break;
}
}
- _ => {
+ None => {
+ if temp.ends_with(&end_delimiter) {
+ if let Some(temp) =
temp.strip_suffix(&end_delimiter) {
+ s.push_str(temp);
+ }
+ break;
+ }
+
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
- )
+ );
}
}
}
@@ -2569,20 +2556,67 @@ mod tests {
#[test]
fn tokenize_dollar_quoted_string_tagged() {
- let sql = String::from(
- "SELECT $tag$dollar '$' quoted strings have $tags like this$ or
like this $$$tag$",
- );
- let dialect = GenericDialect {};
- let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
- let expected = vec![
- Token::make_keyword("SELECT"),
- Token::Whitespace(Whitespace::Space),
- Token::DollarQuotedString(DollarQuotedString {
- value: "dollar '$' quoted strings have $tags like this$ or
like this $$".into(),
- tag: Some("tag".into()),
- }),
+ let test_cases = vec![
+ (
+ String::from("SELECT $tag$dollar '$' quoted strings have $tags
like this$ or like this $$$tag$"),
+ vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "dollar '$' quoted strings have $tags like
this$ or like this $$".into(),
+ tag: Some("tag".into()),
+ })
+ ]
+ ),
+ (
+ String::from("SELECT $abc$x$ab$abc$"),
+ vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "x$ab".into(),
+ tag: Some("abc".into()),
+ })
+ ]
+ ),
+ (
+ String::from("SELECT $abc$$abc$"),
+ vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "".into(),
+ tag: Some("abc".into()),
+ })
+ ]
+ ),
+ (
+ String::from("0$abc$$abc$1"),
+ vec![
+ Token::Number("0".into(), false),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "".into(),
+ tag: Some("abc".into()),
+ }),
+ Token::Number("1".into(), false),
+ ]
+ ),
+ (
+ String::from("$function$abc$q$data$q$$function$"),
+ vec![
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "abc$q$data$q$".into(),
+ tag: Some("function".into()),
+ }),
+ ]
+ ),
];
- compare(expected, tokens);
+
+ let dialect = GenericDialect {};
+ for (sql, expected) in test_cases {
+ let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+ compare(expected, tokens);
+ }
}
#[test]
@@ -2601,6 +2635,22 @@ mod tests {
);
}
+ #[test]
+ fn tokenize_dollar_quoted_string_tagged_unterminated_mirror() {
+ let sql = String::from("SELECT $abc$abc$");
+ let dialect = GenericDialect {};
+ assert_eq!(
+ Tokenizer::new(&dialect, &sql).tokenize(),
+ Err(TokenizerError {
+ message: "Unterminated dollar-quoted, expected $".into(),
+ location: Location {
+ line: 1,
+ column: 17
+ }
+ })
+ );
+ }
+
#[test]
fn tokenize_dollar_placeholder() {
let sql = String::from("SELECT $$, $$ABC$$, $ABC$, $ABC");
@@ -2625,6 +2675,38 @@ mod tests {
);
}
+ #[test]
+ fn tokenize_nested_dollar_quoted_strings() {
+ let sql = String::from("SELECT $tag$dollar $nested$ string$tag$");
+ let dialect = GenericDialect {};
+ let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+ let expected = vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "dollar $nested$ string".into(),
+ tag: Some("tag".into()),
+ }),
+ ];
+ compare(expected, tokens);
+ }
+
+ #[test]
+ fn tokenize_dollar_quoted_string_untagged_empty() {
+ let sql = String::from("SELECT $$$$");
+ let dialect = GenericDialect {};
+ let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
+ let expected = vec![
+ Token::make_keyword("SELECT"),
+ Token::Whitespace(Whitespace::Space),
+ Token::DollarQuotedString(DollarQuotedString {
+ value: "".into(),
+ tag: None,
+ }),
+ ];
+ compare(expected, tokens);
+ }
+
#[test]
fn tokenize_dollar_quoted_string_untagged() {
let sql =
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]