iffyio commented on code in PR #1747:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1747#discussion_r2062561210


##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
 
             if let Token::Word(word) = self.peek_token().token {
                 if word.keyword == Keyword::OPTIONS {
-                    options = Some(self.parse_options(Keyword::OPTIONS)?);
+                    table_options =
+                        
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
                 }
             };
         }
 
+        if !dialect_of!(self is HiveDialect) && table_options == 
CreateTableOptions::None {
+            let plain_options = self.parse_plain_options()?;
+            if !plain_options.is_empty() {
+                table_options = CreateTableOptions::Plain(plain_options)
+            }
+        };
+
         Ok(CreateTableConfiguration {
             partition_by,
             cluster_by,
-            options,
+            table_options,
         })
     }
 
+    fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError> 
{
+        // Single parameter option
+        if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+            return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+        }
+
+        // Custom option
+        if self.parse_keywords(&[Keyword::COMMENT]) {
+            let has_eq = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let comment = match (has_eq, value.token) {
+                (true, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+                }
+                (false, Token::SingleQuotedString(s)) => {
+                    Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+                }
+                (_, token) => {
+                    self.expected("Token::SingleQuotedString", 
TokenWithSpan::wrap(token))
+                }
+            };
+            return comment;
+        }
+
+        if self.parse_keywords(&[Keyword::ENGINE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let engine = match value.token {
+                Token::Word(w) => {
+                    let parameters = if self.peek_token() == Token::LParen {
+                        Some(self.parse_parenthesized_identifiers()?)
+                    } else {
+                        None
+                    };
+
+                    Ok(Some(SqlOption::TableEngine(TableEngine {
+                        name: w.value,
+                        parameters,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return engine;
+        }
+
+        if self.parse_keywords(&[Keyword::TABLESPACE]) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            let tablespace = match value.token {
+                // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE 
tablespace_name] STORAGE MEMORY
+                Token::Word(Word { value: name, .. }) | 
Token::SingleQuotedString(name) => {
+                    let storage = match self.parse_keyword(Keyword::STORAGE) {
+                        true => {
+                            let _ = self.consume_token(&Token::Eq);
+                            let storage_token = self.next_token();
+                            match &storage_token.token {
+                                Token::Word(w) => match 
w.value.to_uppercase().as_str() {
+                                    "DISK" => Some(StorageType::Disk),
+                                    "MEMORY" => Some(StorageType::Memory),
+                                    _ => self
+                                        .expected("Storage type (DISK or 
MEMORY)", storage_token)?,
+                                },
+                                _ => self.expected("Token::Word", 
storage_token)?,
+                            }
+                        }
+                        false => None,
+                    };
+
+                    Ok(Some(SqlOption::TableSpace(TablespaceOption {
+                        name,
+                        storage,
+                    })))
+                }
+                _ => {
+                    return self.expected("Token::Word", value)?;
+                }
+            };
+
+            return tablespace;
+        }
+
+        if self.parse_keyword(Keyword::UNION) {
+            let _ = self.consume_token(&Token::Eq);
+            let value = self.next_token();
+
+            match value.token {
+                // UNION [=] (tbl_name[,tbl_name]...)
+                Token::LParen => {
+                    let tables: Vec<Ident> =
+                        self.parse_comma_separated0(Parser::parse_identifier, 
Token::RParen)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    return Ok(Some(SqlOption::Union(tables)));
+                }
+                _ => {
+                    return self.expected("Token::LParen", value)?;
+                }
+            }
+        }
+
+        // Key/Value parameter option
+        let key = if self.parse_keywords(&[Keyword::DEFAULT, 
Keyword::CHARSET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARSET")
+        } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARACTER, 
Keyword::SET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("DEFAULT CHARACTER SET")
+        } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE]) {
+            // [DEFAULT] COLLATE [=] collation_name
+            Ident::new("DEFAULT COLLATE")
+        } else if self.parse_keywords(&[Keyword::DATA, Keyword::DIRECTORY]) {
+            // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+            Ident::new("DATA DIRECTORY")
+        } else if self.parse_keywords(&[Keyword::INDEX, Keyword::DIRECTORY]) {
+            // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+            Ident::new("INDEX DIRECTORY")
+        } else if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("CHARACTER SET")
+        } else if self.parse_keyword(Keyword::CHARSET) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("CHARSET")
+        } else if self.parse_keyword(Keyword::COLLATE) {
+            // [DEFAULT] CHARACTER SET [=] charset_name
+            Ident::new("COLLATE")
+        } else if self.parse_keyword(Keyword::KEY_BLOCK_SIZE) {
+            // KEY_BLOCK_SIZE [=] value
+            Ident::new("KEY_BLOCK_SIZE")
+        } else if self.parse_keyword(Keyword::ROW_FORMAT) {
+            // ROW_FORMAT [=] {DEFAULT | DYNAMIC | FIXED | COMPRESSED | 
REDUNDANT | COMPACT}
+            Ident::new("ROW_FORMAT")
+        } else if self.parse_keyword(Keyword::PACK_KEYS) {
+            // PACK_KEYS [=] {0 | 1 | DEFAULT}
+            Ident::new("PACK_KEYS")
+        } else if self.parse_keyword(Keyword::STATS_AUTO_RECALC) {
+            // STATS_AUTO_RECALC [=] {DEFAULT | 0 | 1}
+            Ident::new("STATS_AUTO_RECALC")
+        } else if self.parse_keyword(Keyword::STATS_PERSISTENT) {
+            //STATS_PERSISTENT [=] {DEFAULT | 0 | 1}
+            Ident::new("STATS_PERSISTENT")
+        } else if self.parse_keyword(Keyword::STATS_SAMPLE_PAGES) {
+            // STATS_SAMPLE_PAGES [=] value
+            Ident::new("STATS_SAMPLE_PAGES")
+        } else if self.parse_keyword(Keyword::DELAY_KEY_WRITE) {
+            // DELAY_KEY_WRITE [=] {0 | 1}
+            Ident::new("DELAY_KEY_WRITE")
+        } else if self.parse_keyword(Keyword::COMPRESSION) {
+            // COMPRESSION [=] {'ZLIB' | 'LZ4' | 'NONE'}
+            Ident::new("COMPRESSION")
+        } else if self.parse_keyword(Keyword::ENCRYPTION) {
+            // ENCRYPTION [=] {'Y' | 'N'}
+            Ident::new("ENCRYPTION")
+        } else if self.parse_keyword(Keyword::MAX_ROWS) {
+            // MAX_ROWS [=] value
+            Ident::new("MAX_ROWS")
+        } else if self.parse_keyword(Keyword::MIN_ROWS) {
+            // MIN_ROWS [=] value
+            Ident::new("MIN_ROWS")
+        } else if self.parse_keyword(Keyword::AUTOEXTEND_SIZE) {
+            // AUTOEXTEND_SIZE [=] value
+            Ident::new("AUTOEXTEND_SIZE")
+        } else if self.parse_keyword(Keyword::AVG_ROW_LENGTH) {
+            // AVG_ROW_LENGTH [=] value
+            Ident::new("AVG_ROW_LENGTH")
+        } else if self.parse_keyword(Keyword::CHECKSUM) {
+            // CHECKSUM [=] {0 | 1}
+            Ident::new("CHECKSUM")
+        } else if self.parse_keyword(Keyword::CONNECTION) {
+            // CONNECTION [=] 'connect_string'
+            Ident::new("CONNECTION")
+        } else if self.parse_keyword(Keyword::ENGINE_ATTRIBUTE) {
+            // ENGINE_ATTRIBUTE [=] 'string'
+            Ident::new("ENGINE_ATTRIBUTE")
+        } else if self.parse_keyword(Keyword::PASSWORD) {
+            // PASSWORD [=] 'string'
+            Ident::new("PASSWORD")
+        } else if self.parse_keyword(Keyword::SECONDARY_ENGINE_ATTRIBUTE) {
+            // SECONDARY_ENGINE_ATTRIBUTE [=] 'string'
+            Ident::new("SECONDARY_ENGINE_ATTRIBUTE")
+        } else if self.parse_keyword(Keyword::INSERT_METHOD) {
+            // INSERT_METHOD [=] { NO | FIRST | LAST }
+            Ident::new("INSERT_METHOD")
+        } else if self.parse_keyword(Keyword::AUTO_INCREMENT) {
+            Ident::new("AUTO_INCREMENT")

Review Comment:
   > the term like "DEFAULT CHARACTER SET" is parsed into an identifier
   
   Ah so to clarify what I meant in case. I was reiterating my [previous 
comment 
here](https://github.com/apache/datafusion-sqlparser-rs/pull/1747#discussion_r2011373382),
 in that the two-three word keys like 'DEFAULT CHARACTER SET' are handled 
explicitly, whereas the majority are single identifiers and can be handled with 
the one-liner as above.
   
   So essentially that in the code we handle the special options like `DEFAULT 
CHARACTER SET` explicitly, then the rest are covered with the catch all `let 
key = self.parse_identifier()`, would that not work do you mean?
   
   Similarly for `COLLATE`
   
   > In the context of "plain table options" (mostly mysql), signed numbers. 
are not applicable
   
   I think this would be acceptable and downstream crates may validate 
signedness if neccessary.
   
   >  i have looked into moving the collate handling into a more specific 
location, but this seems beyond the scope of this PR
   
   Not sure I understand, I didn't imagine something like that would be 
necessary, what I figured was that the collate option would be handled 
explicitly similar to the current code `if self.parse_keyword(Keyword::COLLATE) 
{ ... }`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to