iffyio commented on code in PR #1808: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1808#discussion_r2046079970
########## src/ast/spans.rs: ########## @@ -777,11 +778,9 @@ impl Spanned for ConditionalStatements { ConditionalStatements::Sequence { statements } => { union_spans(statements.iter().map(|s| s.span())) } - ConditionalStatements::BeginEnd { - begin_token: AttachedToken(start), - statements: _, - end_token: AttachedToken(end), - } => union_spans([start.span, end.span].into_iter()), + ConditionalStatements::BeginEnd(bes) => { + union_spans([bes.begin_token.0.span, bes.end_token.0.span].into_iter()) Review Comment: Can we `impl Spanned for BeginEndStatements` and then call `bes.span()` here instead? That would make the span impl reusable by nother nodes that embed the `BeginEndStatements` ########## tests/sqlparser_mssql.rs: ########## @@ -187,6 +188,386 @@ fn parse_mssql_create_procedure() { let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); } +#[test] +fn parse_create_function() { + let return_expression_function = "CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) RETURNS INT AS BEGIN RETURN 1; END"; + assert_eq!( + ms().verified_stmt(return_expression_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident { + value: "some_scalar_udf".into(), + quote_style: None, + span: Span::empty(), + }]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }), Review Comment: ```suggestion name: Some(Ident::new("@foo")), ``` I think the idents in the tests can be simplified with the following ########## src/ast/mod.rs: ########## @@ -2317,15 +2313,37 @@ impl fmt::Display for ConditionalStatements { } Ok(()) } - ConditionalStatements::BeginEnd { statements, .. } => { - write!(f, "BEGIN ")?; - format_statement_list(f, statements)?; - write!(f, " END") - } + ConditionalStatements::BeginEnd(bes) => write!(f, "{}", bes), } } } +/// A shared representation of `BEGIN`, multiple statements, and `END` tokens. Review Comment: ```suggestion /// Represents a list of statements enclosed within `BEGIN` and `END` keywords. /// Example: /// ```sql /// BEGIN /// SELECT 1; /// SELECT 2; /// END /// ``` ``` ########## tests/sqlparser_mssql.rs: ########## @@ -187,6 +188,386 @@ fn parse_mssql_create_procedure() { let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); } +#[test] +fn parse_create_function() { + let return_expression_function = "CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) RETURNS INT AS BEGIN RETURN 1; END"; + assert_eq!( + ms().verified_stmt(return_expression_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident { + value: "some_scalar_udf".into(), + quote_style: None, + span: Span::empty(), + }]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Int(None), + default_expr: None, + }, + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@bar".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 256, + unit: None + })), + default_expr: None, + }, + ]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "BEGIN".to_string(), + quote_style: None, + keyword: Keyword::BEGIN + } + ))), + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Value( + (number("1")).with_empty_span() + ))), + }),], + end_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "END".to_string(), + quote_style: None, + keyword: Keyword::END + } + ))), + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }), + ); + + let multi_statement_function = "\ Review Comment: to simplify the tests, I think we can have one of the test cases like `return_expression_function` as is, which asserts the returned AST. Then for the others, it should be enough to rely on `verified_stmt`, without fully asserting the AST. the latter has significant overhead maintenance wise so that ideally we use them more sparingly. So essentially thinking for `multi_statement_function`, `create_function_with_conditional` and `create_or_alter_function` we just call `verified_stmt(sql)` to assert behavior. Then the `create_or_alter_function` test case I think we can move into this function since its part of the same feature ########## src/ast/ddl.rs: ########## @@ -2272,6 +2277,10 @@ impl fmt::Display for CreateFunction { if let Some(CreateFunctionBody::AsAfterOptions(function_body)) = &self.function_body { write!(f, " AS {function_body}")?; } + if let Some(CreateFunctionBody::AsBeginEnd(bes)) = &self.function_body { + write!(f, " AS")?; + write!(f, " {}", bes)?; Review Comment: ```suggestion write!(f, " AS {bes}")?; ``` ########## src/ast/mod.rs: ########## @@ -9211,6 +9253,41 @@ pub enum CopyIntoSnowflakeKind { Location, } +/// Return (MsSql) +/// +/// for Functions: +/// RETURN scalar_expression +/// +/// See <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql> +/// +/// for Triggers: +/// RETURN +/// +/// See <https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql> Review Comment: ```suggestion /// Represents a `Return` statement. /// /// [MsSql triggers](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql) /// [MySql functions](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql) ``` this makes it possible to include docs to other dialects like bigquery/snowflake that support this statement ########## src/parser/mod.rs: ########## @@ -15017,6 +15075,13 @@ impl<'a> Parser<'a> { } } + fn parse_return(&mut self) -> Result<Statement, ParserError> { + let expr = self.parse_expr()?; Review Comment: Ah alright, in that case I think its only the test cases missing for the return statement we can add those to common.rs I imagine ########## tests/sqlparser_mssql.rs: ########## @@ -187,6 +188,386 @@ fn parse_mssql_create_procedure() { let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); } +#[test] +fn parse_create_function() { + let return_expression_function = "CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) RETURNS INT AS BEGIN RETURN 1; END"; + assert_eq!( + ms().verified_stmt(return_expression_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident { + value: "some_scalar_udf".into(), + quote_style: None, + span: Span::empty(), + }]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Int(None), + default_expr: None, + }, + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@bar".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 256, + unit: None + })), + default_expr: None, + }, + ]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "BEGIN".to_string(), + quote_style: None, + keyword: Keyword::BEGIN + } + ))), Review Comment: ```suggestion begin_token: AttachedToken::empty(), ``` for tests in places where a token is needed, we can use this instead. since there is no Eq impl on the `AttachedToken` type ########## tests/sqlparser_mssql.rs: ########## @@ -187,6 +188,386 @@ fn parse_mssql_create_procedure() { let _ = ms().verified_stmt("CREATE PROCEDURE [foo] AS BEGIN UPDATE bar SET col = 'test'; SELECT [foo] FROM BAR WHERE [FOO] > 10 END"); } +#[test] +fn parse_create_function() { + let return_expression_function = "CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) RETURNS INT AS BEGIN RETURN 1; END"; + assert_eq!( + ms().verified_stmt(return_expression_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident { + value: "some_scalar_udf".into(), + quote_style: None, + span: Span::empty(), + }]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Int(None), + default_expr: None, + }, + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@bar".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 256, + unit: None + })), + default_expr: None, + }, + ]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "BEGIN".to_string(), + quote_style: None, + keyword: Keyword::BEGIN + } + ))), + statements: vec![Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Value( + (number("1")).with_empty_span() + ))), + }),], + end_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "END".to_string(), + quote_style: None, + keyword: Keyword::END + } + ))), + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }), + ); + + let multi_statement_function = "\ + CREATE FUNCTION some_scalar_udf(@foo INT, @bar VARCHAR(256)) \ + RETURNS INT \ + AS \ + BEGIN \ + SET @foo = @foo + 1; \ + RETURN @foo; \ + END\ + "; + assert_eq!( + ms().verified_stmt(multi_statement_function), + sqlparser::ast::Statement::CreateFunction(CreateFunction { + or_alter: false, + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName::from(vec![Ident { + value: "some_scalar_udf".into(), + quote_style: None, + span: Span::empty(), + }]), + args: Some(vec![ + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Int(None), + default_expr: None, + }, + OperateFunctionArg { + mode: None, + name: Some(Ident { + value: "@bar".into(), + quote_style: None, + span: Span::empty(), + }), + data_type: DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 256, + unit: None + })), + default_expr: None, + }, + ]), + return_type: Some(DataType::Int(None)), + function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { + begin_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "BEGIN".to_string(), + quote_style: None, + keyword: Keyword::BEGIN + } + ))), + statements: vec![ + Statement::Set(Set::SingleAssignment { + scope: None, + hivevar: false, + variable: ObjectName::from(vec!["@foo".into()]), + values: vec![sqlparser::ast::Expr::BinaryOp { + left: Box::new(sqlparser::ast::Expr::Identifier(Ident { + value: "@foo".to_string(), + quote_style: None, + span: Span::empty(), + })), + op: sqlparser::ast::BinaryOperator::Plus, + right: Box::new(Expr::Value(number("1").with_empty_span())), + }], + }), + Statement::Return(ReturnStatement { + value: Some(ReturnStatementValue::Expr(Expr::Identifier(Ident { + value: "@foo".into(), + quote_style: None, + span: Span::empty(), + }))), + }), + ], + end_token: AttachedToken(TokenWithSpan::wrap(sqlparser::tokenizer::Token::Word( + sqlparser::tokenizer::Word { + value: "END".to_string(), + quote_style: None, + keyword: Keyword::END + } + ))), + })), + behavior: None, + called_on_null: None, + parallel: None, + using: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + }), + ); + + let create_function_with_conditional = r#" + CREATE FUNCTION some_scalar_udf() + RETURNS INT + AS + BEGIN + IF 1=2 + BEGIN + RETURN 1; + END + + RETURN 0; + END + "#; + let create_stmt = ms().one_statement_parses_to(create_function_with_conditional, ""); Review Comment: can we use `verified_stmt` here instead (what does the empty canonical string imply)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org