Re: [PR] Allow custom OptimizerHints [datafusion-sqlparser-rs]

via GitHub Fri, 20 Feb 2026 06:59:52 -0800


iffyio commented on code in PR #2216:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/2216#discussion_r2833593678



##########
src/parser/mod.rs:
##########
@@ -14168,53 +14168,75 @@ impl<'a> Parser<'a> {
         })
     }
 
-    /// Parses an optional optimizer hint at the current token position
+    /// Parses optimizer hints at the current token position.
+    ///
+    /// Collects all `/*prefix+...*/` and `--prefix+...` patterns.
+    /// The `prefix` is any run of ASCII alphanumeric characters between the
+    /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for 
`/*abc+...*/`).
     ///
     /// 
[MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview)
     /// 
[Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E)
-    fn maybe_parse_optimizer_hint(&mut self) -> Result<Option<OptimizerHint>, 
ParserError> {
+    fn maybe_parse_optimizer_hints(&mut self) -> Result<Vec<OptimizerHint>, 
ParserError> {
         let supports_hints = self.dialect.supports_comment_optimizer_hint();
         if !supports_hints {
-            return Ok(None);
+            return Ok(vec![]);
         }
+        let mut hints = vec![];
         loop {
             let t = self.peek_nth_token_no_skip_ref(0);
             match &t.token {
-                Token::Whitespace(ws) => {
-                    match ws {
-                        Whitespace::SingleLineComment { comment, .. }
-                        | Whitespace::MultiLineComment(comment) => {
-                            return Ok(match comment.strip_prefix("+") {
-                                None => None,
-                                Some(text) => {
-                                    let hint = OptimizerHint {
-                                        text: text.into(),
-                                        style: if let 
Whitespace::SingleLineComment {
-                                            prefix, ..
-                                        } = ws
-                                        {
-                                            OptimizerHintStyle::SingleLine {
-                                                prefix: prefix.clone(),
-                                            }
-                                        } else {
-                                            OptimizerHintStyle::MultiLine
-                                        },
-                                    };
-                                    // Consume the comment token
-                                    self.next_token_no_skip();
-                                    Some(hint)
-                                }
+                Token::Whitespace(ws) => match ws {

Review Comment:
   since the match now only has one item, it looks like we can simplify the 
flow to remove one layer of indentation with e.g.?
   ```rust
   let Token::Whitespace(ws) = &t.token else {
       break
   };
   match ws {
   }
   ```



##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
         "\
        DELETE /*+ foobar */ FROM table_name",
     );
+
+    // ~ prefixed hints: any alphanumeric prefix before `+` is captured

Review Comment:
   ```suggestion
       // prefixed hints: any alphanumeric prefix before `+` is captured
   ```



##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
         "\
        DELETE /*+ foobar */ FROM table_name",
     );
+
+    // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+    let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+    assert_eq!(select.optimizer_hints.len(), 1);
+    assert_eq!(select.optimizer_hints[0].prefix, "abc");
+    assert_eq!(select.optimizer_hints[0].text, " text ");
+
+    // ~ multiple hints with different prefixes
+    let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B 
*/ 1");

Review Comment:
   can we add a test case like the following mixing hint with regular comments?
   `SELECT /*+ A */ /* Regular comment */ /*x2+ B */ 1`



##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
         "\
        DELETE /*+ foobar */ FROM table_name",
     );
+
+    // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+    let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+    assert_eq!(select.optimizer_hints.len(), 1);
+    assert_eq!(select.optimizer_hints[0].prefix, "abc");
+    assert_eq!(select.optimizer_hints[0].text, " text ");
+
+    // ~ multiple hints with different prefixes
+    let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B 
*/ 1");
+    assert_eq!(select.optimizer_hints.len(), 2);
+    assert_eq!(select.optimizer_hints[0].prefix, "");
+    assert_eq!(select.optimizer_hints[0].text, " A ");
+    assert_eq!(select.optimizer_hints[1].prefix, "x2");
+    assert_eq!(select.optimizer_hints[1].text, " B ");
+
+    // ~ prefixed hints in INSERT/UPDATE/DELETE

Review Comment:
   ```suggestion
       // prefixed hints in INSERT/UPDATE/DELETE
   ```



##########
src/parser/mod.rs:
##########
@@ -14168,53 +14168,75 @@ impl<'a> Parser<'a> {
         })
     }
 
-    /// Parses an optional optimizer hint at the current token position
+    /// Parses optimizer hints at the current token position.
+    ///
+    /// Collects all `/*prefix+...*/` and `--prefix+...` patterns.
+    /// The `prefix` is any run of ASCII alphanumeric characters between the
+    /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for 
`/*abc+...*/`).
     ///
     /// 
[MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview)
     /// 
[Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E)
-    fn maybe_parse_optimizer_hint(&mut self) -> Result<Option<OptimizerHint>, 
ParserError> {
+    fn maybe_parse_optimizer_hints(&mut self) -> Result<Vec<OptimizerHint>, 
ParserError> {
         let supports_hints = self.dialect.supports_comment_optimizer_hint();
         if !supports_hints {
-            return Ok(None);
+            return Ok(vec![]);
         }
+        let mut hints = vec![];
         loop {
             let t = self.peek_nth_token_no_skip_ref(0);
             match &t.token {
-                Token::Whitespace(ws) => {
-                    match ws {
-                        Whitespace::SingleLineComment { comment, .. }
-                        | Whitespace::MultiLineComment(comment) => {
-                            return Ok(match comment.strip_prefix("+") {
-                                None => None,
-                                Some(text) => {
-                                    let hint = OptimizerHint {
-                                        text: text.into(),
-                                        style: if let 
Whitespace::SingleLineComment {
-                                            prefix, ..
-                                        } = ws
-                                        {
-                                            OptimizerHintStyle::SingleLine {
-                                                prefix: prefix.clone(),
-                                            }
-                                        } else {
-                                            OptimizerHintStyle::MultiLine
-                                        },
-                                    };
-                                    // Consume the comment token
-                                    self.next_token_no_skip();
-                                    Some(hint)
-                                }
+                Token::Whitespace(ws) => match ws {
+                    Whitespace::SingleLineComment { comment, prefix } => {
+                        if let Some((hint_prefix, text)) =
+                            Self::extract_hint_prefix_and_text(comment)
+                        {
+                            hints.push(OptimizerHint {
+                                prefix: hint_prefix,
+                                text,
+                                style: OptimizerHintStyle::SingleLine {
+                                    prefix: prefix.clone(),
+                                },
                             });
+                            self.next_token_no_skip();
+                        } else {
+                            break;
                         }
-                        Whitespace::Space | Whitespace::Tab | 
Whitespace::Newline => {
-                            // Consume the token and try with the next 
whitespace or comment
+                    }
+                    Whitespace::MultiLineComment(comment) => {
+                        if let Some((hint_prefix, text)) =
+                            Self::extract_hint_prefix_and_text(comment)
+                        {
+                            hints.push(OptimizerHint {
+                                prefix: hint_prefix,
+                                text,
+                                style: OptimizerHintStyle::MultiLine,
+                            });
                             self.next_token_no_skip();
+                        } else {
+                            break;
                         }
                     }
-                }
-                _ => return Ok(None),
+                    Whitespace::Space | Whitespace::Tab | Whitespace::Newline 
=> {
+                        self.next_token_no_skip();
+                    }
+                },
+                _ => break,
             }
         }
+        Ok(hints)
+    }
+
+    /// Checks if a comment's content starts with `[ASCII-alphanumeric]*+`
+    /// and returns `(prefix, text_after_plus)` if so.
+    fn extract_hint_prefix_and_text(comment: &str) -> Option<(String, String)> 
{
+        let plus_pos = comment.find('+')?;
+        let before_plus = &comment[..plus_pos];
+        if before_plus.chars().all(|c| c.is_ascii_alphanumeric()) {
+            let text = &comment[plus_pos + 1..];
+            Some((before_plus.to_string(), text.to_string()))
+        } else {
+            None
+        }

Review Comment:
   @altmannmarcelo could we use the `split_once` proposed to simplify the 
function body?



##########
tests/sqlparser_oracle.rs:
##########
@@ -338,36 +338,32 @@ fn parse_national_quote_delimited_string_but_is_a_word() {
 fn test_optimizer_hints() {
     let oracle_dialect = oracle();
 
-    // selects
+    // ~ selects: all `/*+...*/` comments are collected as hints
     let select = oracle_dialect.verified_only_select_with_canonical(
         "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual",
-        "SELECT /*+one two three*/ 1 FROM dual",
-    );
-    assert_eq!(
-        select
-            .optimizer_hint
-            .as_ref()
-            .map(|hint| hint.text.as_str()),
-        Some("one two three")
+        "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual",
     );
+    assert_eq!(select.optimizer_hints.len(), 2);
+    assert_eq!(select.optimizer_hints[0].text, "one two three");
+    assert_eq!(select.optimizer_hints[0].prefix, "");
+    assert_eq!(select.optimizer_hints[1].text, "not a hint!");

Review Comment:
   @altmannmarcelo maybe we can change the `not a hint!` value to avoid 
confusion in the test case going forward?



##########
tests/sqlparser_mysql.rs:
##########
@@ -4641,6 +4641,25 @@ fn test_optimizer_hints() {
         "\
        DELETE /*+ foobar */ FROM table_name",
     );
+
+    // ~ prefixed hints: any alphanumeric prefix before `+` is captured
+    let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1");
+    assert_eq!(select.optimizer_hints.len(), 1);
+    assert_eq!(select.optimizer_hints[0].prefix, "abc");
+    assert_eq!(select.optimizer_hints[0].text, " text ");
+
+    // ~ multiple hints with different prefixes

Review Comment:
   ```suggestion
       // multiple hints with different prefixes
   ```



##########
tests/sqlparser_oracle.rs:
##########
@@ -387,6 +383,15 @@ fn test_optimizer_hints() {
                (pt.person_id, pt.first_name, pt.last_name, pt.title) \
                VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)",
     );
+
+    // ~ single-line prefixed hint (Oracle supports `--` without trailing 
whitespace)

Review Comment:
   ```suggestion
       // single-line prefixed hint (Oracle supports `--` without trailing 
whitespace)
   ```



##########
tests/sqlparser_oracle.rs:
##########
@@ -338,36 +338,32 @@ fn parse_national_quote_delimited_string_but_is_a_word() {
 fn test_optimizer_hints() {
     let oracle_dialect = oracle();
 
-    // selects
+    // ~ selects: all `/*+...*/` comments are collected as hints

Review Comment:
   ```suggestion
       // selects: all `/*+...*/` comments are collected as hints
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Allow custom OptimizerHints [datafusion-sqlparser-rs]

Reply via email to