mvzink commented on code in PR #1747:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1747#discussion_r2023188341


##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
 
             if let Token::Word(word) = self.peek_token().token {
                 if word.keyword == Keyword::OPTIONS {
-                    options = Some(self.parse_options(Keyword::OPTIONS)?);
+                    table_options =
+                        
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
                 }
             };
         }
 
+        if !dialect_of!(self is HiveDialect) && table_options == 
CreateTableOptions::None {
+            let plain_options = self.parse_plain_options()?;
+            if !plain_options.is_empty() {
+                table_options = CreateTableOptions::Plain(plain_options)
+            }
+        };
+
         Ok(CreateTableConfiguration {
             partition_by,
             cluster_by,
-            options,
+            table_options,
         })
     }
 
+    fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError> 
{
+        // Single parameter option
+        if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+            return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+        }
+
+        // Custom option
+        if self.parse_keywords(&[Keyword::COMMENT]) {
+            let has_eq = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let comment = match (has_eq, value.token) {
+                (true, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+                }
+                (false, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+                }
+                (_, token) => {
+                    self.expected("Token::SingleQuotedString", 
TokenWithSpan::wrap(token))
+                }
+            };
+            return comment;
+        }
+
+        if self.parse_keywords(&[Keyword::ENGINE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let engine = match value.token {
+                Token::Word(w) => {
+                    let parameters = if self.peek_token() == Token::LParen {
+                        Some(self.parse_parenthesized_identifiers()?)
+                    } else {
+                        None
+                    };
+
+                    Ok(Some(SqlOption::TableEngine(TableEngine {
+                        name: w.value,
+                        parameters,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return engine;
+        }
+
+        if self.parse_keywords(&[Keyword::TABLESPACE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let tablespace = match value.token {
+                // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE 
tablespace_name] STORAGE MEMORY
+                Token::Word(Word { value: name, .. }) | 
Token::SingleQuotedString(name) => {
+                    let storage = match self.parse_keyword(Keyword::STORAGE) {
+                        true => {
+                            let _ = self.consume_token(&Token::Eq);
+                            let storage_token = self.next_token();
+                            match &storage_token.token {
+                                Token::Word(w) => match 
w.value.to_uppercase().as_str() {
+                                    "DISK" => Some(StorageType::Disk),
+                                    "MEMORY" => Some(StorageType::Memory),
+                                    _ => self
+                                        .expected("Storage type (DISK or 
MEMORY)", storage_token)?,
+                                },
+                                _ => self.expected("Token::Word", 
storage_token)?,
+                            }
+                        }
+                        false => None,
+                    };
+
+                    Ok(Some(SqlOption::TableSpace(TablespaceOption {
+                        name,
+                        storage,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return tablespace;
+        }
+
+        if self.parse_keyword(Keyword::UNION) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            match value.token {
+                // UNION [=] (tbl_name[,tbl_name]...)
+                Token::LParen => {
+                    let tables: Vec<Ident> =
+                        self.parse_comma_separated0(Parser::parse_identifier, 
Token::RParen)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    return Ok(Some(SqlOption::Union(tables)));
+                }
+                _ => {
+                    return self.expected("Token::LParen", value)?;
+                }
+            }
+        }
+
+        // Key/Value parameter option
+        let key = if self.parse_keywords(&[Keyword::DEFAULT, 
Keyword::CHARSET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARSET")
+        } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARACTER, 
Keyword::SET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARACTER SET")

Review Comment:
   Not for this PR IMO, but I'd be curious to hear your thoughts & @iffyio's: 
would it be appropriate to add another `SqlOption` variant like `TableSpace` 
and `TableEngine` for a few of these options to make inspecting them easier for 
users? I'm specifically thinking of `[DEFAULT] {CHARSET | CHARACTER SET}` 
because as it is if I wanted to extract charset info from DDL, I would have to 
check for 4 different hardcoded strings which aren't represented in the public 
API (i.e. there's no `DEFAULT_CHARACTER_SET_IDENT` or a set of idents I could 
expect to see). Does the fact that it has 4 possible representations make it 
worth special casing with a `SqlOption::CharSet { default, shorthand, value }`? 
Since I will in fact need to inspect character set info, I may post a PR for 
that.
   
   For this PR though, to at least make it harder to miss one of those 4 
variants, can we put all the charset related branches next to each other?



##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
 
             if let Token::Word(word) = self.peek_token().token {
                 if word.keyword == Keyword::OPTIONS {
-                    options = Some(self.parse_options(Keyword::OPTIONS)?);
+                    table_options =
+                        
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
                 }
             };
         }
 
+        if !dialect_of!(self is HiveDialect) && table_options == 
CreateTableOptions::None {
+            let plain_options = self.parse_plain_options()?;
+            if !plain_options.is_empty() {
+                table_options = CreateTableOptions::Plain(plain_options)
+            }
+        };
+
         Ok(CreateTableConfiguration {
             partition_by,
             cluster_by,
-            options,
+            table_options,
         })
     }
 
+    fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError> 
{
+        // Single parameter option
+        if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+            return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+        }
+
+        // Custom option
+        if self.parse_keywords(&[Keyword::COMMENT]) {
+            let has_eq = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let comment = match (has_eq, value.token) {
+                (true, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+                }
+                (false, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+                }
+                (_, token) => {
+                    self.expected("Token::SingleQuotedString", 
TokenWithSpan::wrap(token))
+                }
+            };
+            return comment;
+        }
+
+        if self.parse_keywords(&[Keyword::ENGINE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let engine = match value.token {
+                Token::Word(w) => {
+                    let parameters = if self.peek_token() == Token::LParen {
+                        Some(self.parse_parenthesized_identifiers()?)
+                    } else {
+                        None
+                    };
+
+                    Ok(Some(SqlOption::TableEngine(TableEngine {
+                        name: w.value,
+                        parameters,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return engine;
+        }
+
+        if self.parse_keywords(&[Keyword::TABLESPACE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let tablespace = match value.token {
+                // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE 
tablespace_name] STORAGE MEMORY
+                Token::Word(Word { value: name, .. }) | 
Token::SingleQuotedString(name) => {
+                    let storage = match self.parse_keyword(Keyword::STORAGE) {
+                        true => {
+                            let _ = self.consume_token(&Token::Eq);
+                            let storage_token = self.next_token();
+                            match &storage_token.token {
+                                Token::Word(w) => match 
w.value.to_uppercase().as_str() {
+                                    "DISK" => Some(StorageType::Disk),
+                                    "MEMORY" => Some(StorageType::Memory),
+                                    _ => self
+                                        .expected("Storage type (DISK or 
MEMORY)", storage_token)?,
+                                },
+                                _ => self.expected("Token::Word", 
storage_token)?,
+                            }
+                        }
+                        false => None,
+                    };
+
+                    Ok(Some(SqlOption::TableSpace(TablespaceOption {
+                        name,
+                        storage,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return tablespace;
+        }
+
+        if self.parse_keyword(Keyword::UNION) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            match value.token {
+                // UNION [=] (tbl_name[,tbl_name]...)
+                Token::LParen => {
+                    let tables: Vec<Ident> =
+                        self.parse_comma_separated0(Parser::parse_identifier, 
Token::RParen)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    return Ok(Some(SqlOption::Union(tables)));
+                }
+                _ => {
+                    return self.expected("Token::LParen", value)?;
+                }
+            }
+        }
+
+        // Key/Value parameter option
+        let key = if self.parse_keywords(&[Keyword::DEFAULT, 
Keyword::CHARSET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARSET")
+        } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARACTER, 
Keyword::SET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARACTER SET")
+        } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE]) {
+            // [DEFAULT] COLLATE [=] collation_name
+            Ident::new("DEFAULT COLLATE")
+        } else if self.parse_keywords(&[Keyword::DATA, Keyword::DIRECTORY]) {
+            // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+            Ident::new("DATA DIRECTORY")
+        } else if self.parse_keywords(&[Keyword::INDEX, Keyword::DIRECTORY]) {
+            // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+            Ident::new("INDEX DIRECTORY")
+        } else if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("CHARACTER SET")
+        } else if self.parse_keyword(Keyword::CHARSET) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("CHARSET")
+        } else if self.parse_keyword(Keyword::COLLATE) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("COLLATE")

Review Comment:
   These comments are wrong.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to