iffyio commented on code in PR #2216:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/2216#discussion_r2833593678
##########
src/parser/mod.rs:
##########
@@ -14168,53 +14168,75 @@ impl<'a> Parser<'a> {
})
}
- /// Parses an optional optimizer hint at the current token position
+ /// Parses optimizer hints at the current token position.
+ ///
+ /// Collects all `/*prefix+...*/` and `--prefix+...` patterns.
+ /// The `prefix` is any run of ASCII alphanumeric characters between the
+ /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for
`/*abc+...*/`).
///
///
[MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview)
///
[Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E)
- fn maybe_parse_optimizer_hint(&mut self) -> Result<Option<OptimizerHint>,
ParserError> {
+ fn maybe_parse_optimizer_hints(&mut self) -> Result<Vec<OptimizerHint>,
ParserError> {
let supports_hints = self.dialect.supports_comment_optimizer_hint();
if !supports_hints {
- return Ok(None);
+ return Ok(vec![]);
}
+ let mut hints = vec![];
loop {
let t = self.peek_nth_token_no_skip_ref(0);
match &t.token {
- Token::Whitespace(ws) => {
- match ws {
- Whitespace::SingleLineComment { comment, .. }
- | Whitespace::MultiLineComment(comment) => {
- return Ok(match comment.strip_prefix("+") {
- None => None,
- Some(text) => {
- let hint = OptimizerHint {
- text: text.into(),
- style: if let
Whitespace::SingleLineComment {
- prefix, ..
- } = ws
- {
- OptimizerHintStyle::SingleLine {
- prefix: prefix.clone(),
- }
- } else {
- OptimizerHintStyle::MultiLine
- },
- };
- // Consume the comment token
- self.next_token_no_skip();
- Some(hint)
- }
+ Token::Whitespace(ws) => match ws {
Review Comment:
since the match now only has one item, it looks like we can simplify the
flow to remove one layer of indentation with e.g.?
```rust
let Token::Whitespace(ws) = &t.token else {
break
};
match ws {
}
```
##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
"\
DELETE /*+ foobar */ FROM table_name",
);
+
+ // ~ prefixed hints: any alphanumeric prefix before `+` is captured
Review Comment:
```suggestion
// prefixed hints: any alphanumeric prefix before `+` is captured
```
##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
"\
DELETE /*+ foobar */ FROM table_name",
);
+
+ // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+ let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+ assert_eq!(select.optimizer_hints.len(), 1);
+ assert_eq!(select.optimizer_hints[0].prefix, "abc");
+ assert_eq!(select.optimizer_hints[0].text, " text ");
+
+ // ~ multiple hints with different prefixes
+ let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B
*/ 1");
Review Comment:
can we add a test case like the following mixing hint with regular comments?
`SELECT /*+ A */ /* Regular comment */ /*x2+ B */ 1`
##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
"\
DELETE /*+ foobar */ FROM table_name",
);
+
+ // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+ let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+ assert_eq!(select.optimizer_hints.len(), 1);
+ assert_eq!(select.optimizer_hints[0].prefix, "abc");
+ assert_eq!(select.optimizer_hints[0].text, " text ");
+
+ // ~ multiple hints with different prefixes
+ let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B
*/ 1");
+ assert_eq!(select.optimizer_hints.len(), 2);
+ assert_eq!(select.optimizer_hints[0].prefix, "");
+ assert_eq!(select.optimizer_hints[0].text, " A ");
+ assert_eq!(select.optimizer_hints[1].prefix, "x2");
+ assert_eq!(select.optimizer_hints[1].text, " B ");
+
+ // ~ prefixed hints in INSERT/UPDATE/DELETE
Review Comment:
```suggestion
// prefixed hints in INSERT/UPDATE/DELETE
```
##########
src/parser/mod.rs:
##########
@@ -14168,53 +14168,75 @@ impl<'a> Parser<'a> {
})
}
- /// Parses an optional optimizer hint at the current token position
+ /// Parses optimizer hints at the current token position.
+ ///
+ /// Collects all `/*prefix+...*/` and `--prefix+...` patterns.
+ /// The `prefix` is any run of ASCII alphanumeric characters between the
+ /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for
`/*abc+...*/`).
///
///
[MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview)
///
[Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E)
- fn maybe_parse_optimizer_hint(&mut self) -> Result<Option<OptimizerHint>,
ParserError> {
+ fn maybe_parse_optimizer_hints(&mut self) -> Result<Vec<OptimizerHint>,
ParserError> {
let supports_hints = self.dialect.supports_comment_optimizer_hint();
if !supports_hints {
- return Ok(None);
+ return Ok(vec![]);
}
+ let mut hints = vec![];
loop {
let t = self.peek_nth_token_no_skip_ref(0);
match &t.token {
- Token::Whitespace(ws) => {
- match ws {
- Whitespace::SingleLineComment { comment, .. }
- | Whitespace::MultiLineComment(comment) => {
- return Ok(match comment.strip_prefix("+") {
- None => None,
- Some(text) => {
- let hint = OptimizerHint {
- text: text.into(),
- style: if let
Whitespace::SingleLineComment {
- prefix, ..
- } = ws
- {
- OptimizerHintStyle::SingleLine {
- prefix: prefix.clone(),
- }
- } else {
- OptimizerHintStyle::MultiLine
- },
- };
- // Consume the comment token
- self.next_token_no_skip();
- Some(hint)
- }
+ Token::Whitespace(ws) => match ws {
+ Whitespace::SingleLineComment { comment, prefix } => {
+ if let Some((hint_prefix, text)) =
+ Self::extract_hint_prefix_and_text(comment)
+ {
+ hints.push(OptimizerHint {
+ prefix: hint_prefix,
+ text,
+ style: OptimizerHintStyle::SingleLine {
+ prefix: prefix.clone(),
+ },
});
+ self.next_token_no_skip();
+ } else {
+ break;
}
- Whitespace::Space | Whitespace::Tab |
Whitespace::Newline => {
- // Consume the token and try with the next
whitespace or comment
+ }
+ Whitespace::MultiLineComment(comment) => {
+ if let Some((hint_prefix, text)) =
+ Self::extract_hint_prefix_and_text(comment)
+ {
+ hints.push(OptimizerHint {
+ prefix: hint_prefix,
+ text,
+ style: OptimizerHintStyle::MultiLine,
+ });
self.next_token_no_skip();
+ } else {
+ break;
}
}
- }
- _ => return Ok(None),
+ Whitespace::Space | Whitespace::Tab | Whitespace::Newline
=> {
+ self.next_token_no_skip();
+ }
+ },
+ _ => break,
}
}
+ Ok(hints)
+ }
+
+ /// Checks if a comment's content starts with `[ASCII-alphanumeric]*+`
+ /// and returns `(prefix, text_after_plus)` if so.
+ fn extract_hint_prefix_and_text(comment: &str) -> Option<(String, String)>
{
+ let plus_pos = comment.find('+')?;
+ let before_plus = &comment[..plus_pos];
+ if before_plus.chars().all(|c| c.is_ascii_alphanumeric()) {
+ let text = &comment[plus_pos + 1..];
+ Some((before_plus.to_string(), text.to_string()))
+ } else {
+ None
+ }
Review Comment:
@altmannmarcelo could we use the `split_once` proposed to simplify the
function body?
##########
tests/sqlparser_oracle.rs:
##########
@@ -338,36 +338,32 @@ fn parse_national_quote_delimited_string_but_is_a_word() {
fn test_optimizer_hints() {
let oracle_dialect = oracle();
- // selects
+ // ~ selects: all `/*+...*/` comments are collected as hints
let select = oracle_dialect.verified_only_select_with_canonical(
"SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual",
- "SELECT /*+one two three*/ 1 FROM dual",
- );
- assert_eq!(
- select
- .optimizer_hint
- .as_ref()
- .map(|hint| hint.text.as_str()),
- Some("one two three")
+ "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual",
);
+ assert_eq!(select.optimizer_hints.len(), 2);
+ assert_eq!(select.optimizer_hints[0].text, "one two three");
+ assert_eq!(select.optimizer_hints[0].prefix, "");
+ assert_eq!(select.optimizer_hints[1].text, "not a hint!");
Review Comment:
@altmannmarcelo maybe we can change the `not a hint!` value to avoid
confusion in the test case going forward?
##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
"\
DELETE /*+ foobar */ FROM table_name",
);
+
+ // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+ let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+ assert_eq!(select.optimizer_hints.len(), 1);
+ assert_eq!(select.optimizer_hints[0].prefix, "abc");
+ assert_eq!(select.optimizer_hints[0].text, " text ");
+
+ // ~ multiple hints with different prefixes
Review Comment:
```suggestion
// multiple hints with different prefixes
```
##########
tests/sqlparser_oracle.rs:
##########
@@ -387,6 +383,15 @@ fn test_optimizer_hints() {
(pt.person_id, pt.first_name, pt.last_name, pt.title) \
VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)",
);
+
+ // ~ single-line prefixed hint (Oracle supports `--` without trailing
whitespace)
Review Comment:
```suggestion
// single-line prefixed hint (Oracle supports `--` without trailing
whitespace)
```
##########
tests/sqlparser_oracle.rs:
##########
@@ -338,36 +338,32 @@ fn parse_national_quote_delimited_string_but_is_a_word() {
fn test_optimizer_hints() {
let oracle_dialect = oracle();
- // selects
+ // ~ selects: all `/*+...*/` comments are collected as hints
Review Comment:
```suggestion
// selects: all `/*+...*/` comments are collected as hints
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]