iffyio commented on code in PR #1809: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1809#discussion_r2050229485
########## tests/sqlparser_mssql.rs: ########## @@ -2053,3 +2054,171 @@ fn parse_drop_trigger() { } ); } + +#[test] +fn parse_mssql_go_keyword() { + let single_go_keyword = "USE some_database;\nGO"; Review Comment: Can we have one of the test cases use `ms().one_statement_parses_to(sql, canonical)`? looking at the tests its unclear what the behavior of the parser is in terms of serializing an AST that has GO statements in it, and that it needs special behavior to inject new lines manually I imagine ########## tests/sqlparser_mssql.rs: ########## @@ -2053,3 +2054,171 @@ fn parse_drop_trigger() { } ); } + +#[test] +fn parse_mssql_go_keyword() { + let single_go_keyword = "USE some_database;\nGO"; + let stmts = ms().parse_sql_statements(single_go_keyword).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None }),); + + let go_with_count = "SELECT 1;\nGO 5"; + let stmts = ms().parse_sql_statements(go_with_count).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); + + let go_statement_delimiter = "SELECT 1\nGO"; + let stmts = ms().parse_sql_statements(go_statement_delimiter).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + + let bare_go = "GO"; + let stmts = ms().parse_sql_statements(bare_go).unwrap(); + assert_eq!(stmts.len(), 1); + assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); + + let go_then_statements = "/* whitespace */ GO\nRAISERROR('This is a test', 16, 1);"; + let stmts = ms().parse_sql_statements(go_then_statements).unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[0], Statement::Go(GoStatement { count: None })); + assert_eq!( + stmts[1], + Statement::RaisError { + message: Box::new(Expr::Value( + (Value::SingleQuotedString("This is a test".to_string())).with_empty_span() + )), + severity: Box::new(Expr::Value(number("16").with_empty_span())), + state: Box::new(Expr::Value(number("1").with_empty_span())), + arguments: vec![], + options: vec![], + } + ); + + let multiple_gos = "SELECT 1;\nGO 5\nSELECT 2;\n GO"; + let stmts = ms().parse_sql_statements(multiple_gos).unwrap(); + assert_eq!(stmts.len(), 4); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: Some(5) })); + assert_eq!(stmts[3], Statement::Go(GoStatement { count: None })); + + let single_line_comment_preceding_go = "USE some_database; -- okay\nGO"; + let stmts = ms() + .parse_sql_statements(single_line_comment_preceding_go) + .unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + + let multi_line_comment_preceding_go = "USE some_database; /* okay */\nGO"; + let stmts = ms() + .parse_sql_statements(multi_line_comment_preceding_go) + .unwrap(); + assert_eq!(stmts.len(), 2); + assert_eq!(stmts[1], Statement::Go(GoStatement { count: None })); + + let comment_following_go = "USE some_database;\nGO -- okay"; Review Comment: maybe we can add a `multine_line_comment_following_go` test case? e.g. `GO /* whitespace */ 42` ########## src/dialect/mssql.rs: ########## @@ -116,7 +116,29 @@ impl Dialect for MsSqlDialect { true } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + // if: + // - keyword is `GO`, and + // - looking backwards there's only (any) whitespace preceded by a newline + // then: `GO` iSN'T a column alias + if kw == &Keyword::GO { + let mut look_back_count = 2; + loop { Review Comment: Can we have this logic as a helper function? ``` rust fn prev_nth_token_no_skip(&self, mut n: usize) -> Option<&TokenWithSpan> { // ... } ``` like a combination of [peek_nth_token](https://github.com/apache/datafusion-sqlparser-rs/blob/ed181fde55fa090b42f047bb0fbb84cf494904ef/src/parser/mod.rs#L3922C5-L3922C69) and [prev_token](https://github.com/apache/datafusion-sqlparser-rs/blob/ed181fde55fa090b42f047bb0fbb84cf494904ef/src/parser/mod.rs#L4034) ########## src/ast/mod.rs: ########## @@ -4054,6 +4054,12 @@ pub enum Statement { arguments: Vec<Expr>, options: Vec<RaisErrorOption>, }, + /// Go (MSSQL) + /// + /// GO is not a Transact-SQL statement; it is a command recognized by various tools as a batch delimiter + /// + /// See <https://learn.microsoft.com/en-us/sql/t-sql/language-elements/sql-server-utilities-statements-go> Review Comment: For visibility we can move the comment to the struct definition similar to the print statement? ########## src/dialect/mssql.rs: ########## @@ -116,7 +116,29 @@ impl Dialect for MsSqlDialect { true } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { + fn is_column_alias(&self, kw: &Keyword, parser: &mut Parser) -> bool { + // if: + // - keyword is `GO`, and + // - looking backwards there's only (any) whitespace preceded by a newline + // then: `GO` iSN'T a column alias + if kw == &Keyword::GO { + let mut look_back_count = 2; Review Comment: double checking, why does the index start at 2 vs 1? ########## src/parser/mod.rs: ########## @@ -15017,6 +15026,48 @@ impl<'a> Parser<'a> { } } + fn parse_go(&mut self) -> Result<Statement, ParserError> { + // previous token should be a newline (skipping non-newline whitespace) Review Comment: Ah interesting, I think it would be very helpful to add this example to the code to illustrate the implications, Then for the function, I think using the `peek_token_no_skip` could simplify the impl. Something roughly like the following? to avoid manually tracking token counts and lookbacks ```rust let count = loop { match self.peek_token_no_skip() { Token::Whitespace(Whitespace::Newline) | Token::Whitespace(Whitespace::SingleLineComment) | Token::EOF => { self.advance_token(); break None } Token::Whitespace(_) => { self.advance_token(); } _ => { break Some(self.parse_number()) } } }; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org