iffyio commented on code in PR #2172:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/2172#discussion_r2741860920


##########
src/parser/mod.rs:
##########
@@ -14032,6 +14050,79 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parses SELECT modifiers and DISTINCT/ALL in any order. Allows 
HIGH_PRIORITY, STRAIGHT_JOIN,
+    /// SQL_SMALL_RESULT, SQL_BIG_RESULT, SQL_BUFFER_RESULT, SQL_NO_CACHE, 
SQL_CALC_FOUND_ROWS and
+    /// DISTINCT/DISTINCTROW/ALL to appear in any order.

Review Comment:
   ```suggestion
       /// Parses `SELECT` modifiers and `DISTINCT/ALL` in any order. 
   ```
   Thinking we can avoid enumerating the variants so that they don't go out of 
sync with the impl



##########
src/parser/mod.rs:
##########
@@ -14032,6 +14050,79 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parses SELECT modifiers and DISTINCT/ALL in any order. Allows 
HIGH_PRIORITY, STRAIGHT_JOIN,
+    /// SQL_SMALL_RESULT, SQL_BIG_RESULT, SQL_BUFFER_RESULT, SQL_NO_CACHE, 
SQL_CALC_FOUND_ROWS and
+    /// DISTINCT/DISTINCTROW/ALL to appear in any order.
+    fn parse_select_modifiers(
+        &mut self,
+    ) -> Result<(SelectModifiers, Option<Distinct>), ParserError> {
+        let mut modifiers = SelectModifiers::default();
+        let mut distinct: Option<Distinct> = None;
+        let mut has_all = false;
+
+        let keywords = &[
+            Keyword::ALL,
+            Keyword::DISTINCT,
+            Keyword::DISTINCTROW,
+            Keyword::HIGH_PRIORITY,
+            Keyword::STRAIGHT_JOIN,
+            Keyword::SQL_SMALL_RESULT,
+            Keyword::SQL_BIG_RESULT,
+            Keyword::SQL_BUFFER_RESULT,
+            Keyword::SQL_NO_CACHE,
+            Keyword::SQL_CALC_FOUND_ROWS,
+        ];
+
+        while let Some(keyword) = self.parse_one_of_keywords(keywords) {
+            match keyword {
+                Keyword::ALL => {
+                    if has_all {
+                        self.prev_token();
+                        return self.expected("SELECT without duplicate ALL", 
self.peek_token());
+                    }
+                    if distinct.is_some() {
+                        self.prev_token();
+                        return self.expected("DISTINCT alone without ALL", 
self.peek_token());
+                    }
+                    has_all = true;
+                }
+                Keyword::DISTINCT | Keyword::DISTINCTROW => {
+                    if distinct.is_some() {
+                        self.prev_token();
+                        return self.expected(
+                            "SELECT without duplicate DISTINCT or DISTINCTROW",
+                            self.peek_token(),
+                        );
+                    }
+                    if has_all {
+                        self.prev_token();
+                        return self.expected(
+                            "ALL alone without DISTINCT or DISTINCTROW",
+                            self.peek_token(),
+                        );
+                    }
+                    distinct = Some(Distinct::Distinct);
+                }
+                Keyword::HIGH_PRIORITY => modifiers.high_priority = true,

Review Comment:
   ```suggestion
                   Keyword::HIGH_PRIORITY if !modifiers.high_priority => 
modifiers.high_priority = true,
   ```
   similar for the others is something like this needed if it's invalid that 
they occur multiple times (can we add test cases for the behavior)?



##########
src/ast/query.rs:
##########
@@ -345,6 +409,10 @@ pub struct Select {
     pub select_token: AttachedToken,
     /// `SELECT [DISTINCT] ...`
     pub distinct: Option<Distinct>,
+    /// MySQL-specific SELECT modifiers.
+    ///
+    /// See [MySQL 
SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html).
+    pub select_modifiers: SelectModifiers,

Review Comment:
   ```suggestion
       pub select_modifiers: Option<SelectModifiers>,
   ```



##########
src/ast/query.rs:
##########
@@ -334,6 +334,70 @@ pub enum SelectFlavor {
     FromFirstNoSelect,
 }
 
+/// MySQL-specific SELECT modifiers that appear after the SELECT keyword.
+///
+/// These modifiers affect query execution and optimization. They can appear
+/// in any order after SELECT and before the column list, and can be
+/// interleaved with DISTINCT/DISTINCTROW/ALL:
+///
+/// ```sql
+/// SELECT
+///     [ALL | DISTINCT | DISTINCTROW]
+///     [HIGH_PRIORITY]
+///     [STRAIGHT_JOIN]
+///     [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
+///     [SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
+///     select_expr [, select_expr] ...
+/// ```
+///
+/// See [MySQL SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html).
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub struct SelectModifiers {
+    /// `HIGH_PRIORITY` gives the SELECT higher priority than statements that 
update a table.

Review Comment:
   could we include the mysql links individually on the fields as well? that 
way if other dialects add their own variants in the future the doc would be 
consistent



##########
src/parser/mod.rs:
##########
@@ -4906,14 +4906,17 @@ impl<'a> Parser<'a> {
     /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns 
[`None`] if `ALL` is parsed
     /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are 
found.
     pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, 
ParserError> {
-        let loc = self.peek_token().span.start;
         let all = self.parse_keyword(Keyword::ALL);
         let distinct = self.parse_keyword(Keyword::DISTINCT);
         if !distinct {
             return Ok(None);
         }
         if all {
-            return parser_err!("Cannot specify both ALL and 
DISTINCT".to_string(), loc);
+            self.prev_token();
+            return self.expected(
+                "ALL alone without DISTINCT or DISTINCTROW",

Review Comment:
   this diff seems incorrect to mention `DISTINCTROW`?



##########
src/parser/mod.rs:
##########
@@ -14032,6 +14050,79 @@ impl<'a> Parser<'a> {
         })
     }
 
+    /// Parses SELECT modifiers and DISTINCT/ALL in any order. Allows 
HIGH_PRIORITY, STRAIGHT_JOIN,
+    /// SQL_SMALL_RESULT, SQL_BIG_RESULT, SQL_BUFFER_RESULT, SQL_NO_CACHE, 
SQL_CALC_FOUND_ROWS and
+    /// DISTINCT/DISTINCTROW/ALL to appear in any order.
+    fn parse_select_modifiers(
+        &mut self,
+    ) -> Result<(SelectModifiers, Option<Distinct>), ParserError> {
+        let mut modifiers = SelectModifiers::default();
+        let mut distinct: Option<Distinct> = None;
+        let mut has_all = false;

Review Comment:
   Can we instead introduce a proper enum to represent the three states? the 
current impl I think is harder to follow which combinations are valid and 
if/when the specified option is ignored in the output.



##########
tests/sqlparser_common.rs:
##########
@@ -1040,18 +1041,18 @@ fn parse_outer_join_operator() {
 #[test]
 fn parse_select_distinct_on() {
     let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY 
album_id, milliseconds";
-    let select = verified_only_select(sql);
+    let select = all_dialects_but_mysql().verified_only_select(sql);

Review Comment:
   ```suggestion
       let select = all_dialects_except(|d| 
d.is::<MySqlDialect>()).verified_only_select(sql);
   ```
   thinking instead of introducing a dedicated function we can reuse this 
helper?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to