eyalleshem commented on code in PR #2075:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/2075#discussion_r2577731642
##########
src/tokenizer.rs:
##########
@@ -1783,96 +1786,115 @@ impl<'a> Tokenizer<'a> {
}
/// Tokenize dollar preceded value (i.e: a string/placeholder)
- fn tokenize_dollar_preceded_value(&self, chars: &mut State) ->
Result<Token, TokenizerError> {
- let mut s = String::new();
- let mut value = String::new();
+ fn tokenize_dollar_preceded_value(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<Token, TokenizerError> {
+ chars.next(); // consume first $
- chars.next();
+ // Case 1: $$text$$ (untagged dollar-quoted string)
+ if matches!(chars.peek(), Some('$')) &&
!self.dialect.supports_dollar_placeholder() {
+ let (value, tag) =
self.tokenize_dollar_quoted_string_borrowed(chars, None)?;
+ return Ok(Token::DollarQuotedString(DollarQuotedString {
+ value: value.into_owned(),
+ tag: tag.map(|t| t.into_owned()),
+ }));
+ }
- // If the dialect does not support dollar-quoted strings, then `$$` is
rather a placeholder.
+ // If it's not $$ we have 2 options :
+ // Case 2: $tag$text$tag$ (tagged dollar-quoted string) if dialect
supports it
+ // Case 3: $placeholder (e.g., $1, $name)
+ let tag_start = chars.byte_pos;
+ let _tag_slice = peeking_take_while_ref(chars, |ch| {
+ ch.is_alphanumeric()
+ || ch == '_'
+ || matches!(ch, '$' if
self.dialect.supports_dollar_placeholder())
+ });
+ let tag_end = chars.byte_pos;
+
+ // Case 2: $tag$text$tag$ (tagged dollar-quoted string)
if matches!(chars.peek(), Some('$')) &&
!self.dialect.supports_dollar_placeholder() {
- chars.next();
+ let tag_value = &chars.source[tag_start..tag_end];
+ let (value, tag) =
+ self.tokenize_dollar_quoted_string_borrowed(chars,
Some(tag_value))?;
+ return Ok(Token::DollarQuotedString(DollarQuotedString {
+ value: value.into_owned(),
+ tag: tag.map(|t| t.into_owned()),
+ }));
+ }
- let mut is_terminated = false;
- let mut prev: Option<char> = None;
+ // Case 3: $placeholder (e.g., $1, $name)
+ let tag_value = &chars.source[tag_start..tag_end];
+ Ok(Token::Placeholder(format!("${}", tag_value)))
+ }
- while let Some(&ch) = chars.peek() {
- if prev == Some('$') {
- if ch == '$' {
- chars.next();
- is_terminated = true;
- break;
- } else {
- s.push('$');
- s.push(ch);
+ /// Tokenize a dollar-quoted string ($$text$$ or $tag$text$tag$),
returning borrowed slices.
+ /// tag_prefix: None for $$, Some("tag") for $tag$
+ /// Returns (value: Cow<'a, str>, tag: Option<Cow<'a, str>>)
+ fn tokenize_dollar_quoted_string_borrowed(
+ &self,
+ chars: &mut State<'a>,
+ tag_prefix: Option<&'a str>,
+ ) -> Result<(Cow<'a, str>, Option<Cow<'a, str>>), TokenizerError> {
+ chars.next(); // consume $ after tag (or second $ for $$)
Review Comment:
Done
##########
src/tokenizer.rs:
##########
@@ -1783,96 +1786,115 @@ impl<'a> Tokenizer<'a> {
}
/// Tokenize dollar preceded value (i.e: a string/placeholder)
- fn tokenize_dollar_preceded_value(&self, chars: &mut State) ->
Result<Token, TokenizerError> {
- let mut s = String::new();
- let mut value = String::new();
+ fn tokenize_dollar_preceded_value(
+ &self,
+ chars: &mut State<'a>,
+ ) -> Result<Token, TokenizerError> {
+ chars.next(); // consume first $
Review Comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]