iffyio commented on code in PR #1628: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1628#discussion_r1903243487
########## src/ast/dml.rs: ########## @@ -547,7 +561,15 @@ impl Display for Insert { write!(f, "{source}")?; } - if self.source.is_none() && self.columns.is_empty() { + if let Some(settings) = &self.settings { + write!(f, "SETTINGS {} ", display_comma_separated(settings))?; + } + + if let Some(format_clause) = &self.format_clause { + write!(f, "{format_clause}")?; + } + + if self.source.is_none() && self.columns.is_empty() && self.format_clause.is_none() { Review Comment: ```suggestion if self.source.is_none() && self.columns.is_empty() { ``` thinking the format isn't necessarily tied to the columns? ########## src/keywords.rs: ########## @@ -931,7 +931,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::PREWHERE, // for ClickHouse SELECT * FROM t SETTINGS ... Keyword::SETTINGS, - // for ClickHouse SELECT * FROM t FORMAT... + // for ClickHouse SELECT * FROM t FORMAT... or INSERT INTO t FORMAT... Review Comment: ```suggestion // for ClickHouse SELECT * FROM t FORMAT... ``` we could also remove the full description, the keyword isn't really tied to any specific syntax/dialect ########## tests/sqlparser_clickhouse.rs: ########## @@ -1398,6 +1401,27 @@ fn test_query_with_format_clause() { } } +#[test] +fn test_insert_query_with_format_clause() { + let cases = [ + r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, {"id": 2, "value": "bar"}"#, + r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#, + r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#, + r#"INSERT INTO tbl FORMAT jsoneachrow {"id": 1}"#, + r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": "y"}, "baz": 1}}"#, Review Comment: can we dedupe some of the test cases that seem to check case sensitivity? unless the impl is doing special case sensitive handling (which doesnt seem to be the case) I imagine no need to explicitly test for it ########## src/ast/query.rs: ########## @@ -2465,14 +2465,25 @@ impl fmt::Display for GroupByExpr { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { - Identifier(Ident), + Identifier { + ident: Ident, + expr: Option<Vec<Expr>>, + }, Null, } Review Comment: it doesn't look correct to reuse this struct? The existing FormatClause is currently used on a query which has different variants/syntax than on insert? maybe we can do something like ```rust struct OutputFormatClause { name: Ident values: Vec<Expr> } ``` can we also add doc comments describing the struct/properties with a link to the doc? https://clickhouse.com/docs/en/sql-reference/formats ########## src/ast/dml.rs: ########## @@ -495,6 +495,20 @@ pub struct Insert { pub priority: Option<MysqlInsertPriority>, /// Only for mysql pub insert_alias: Option<InsertAliases>, + /// Settings used in together with a specified `FORMAT`. Review Comment: hmm double checking if this line is correct, is it required that a format clause is present if setting is present? Maybe we can avoid describing semantics in any case, we just mentiong that this is clickhouse settings with a link to the docs ########## src/parser/mod.rs: ########## @@ -11931,10 +11949,41 @@ impl<'a> Parser<'a> { replace_into, priority, insert_alias, + settings, + format_clause, })) } } + // Parses format clause used for [ClickHouse]. Formats are different when using `SELECT` and + // `INSERT` and also when using the CLI for pipes. It may or may not take an additional + // expression after the format so we try to parse the expression but allow failure. + // + // Since we know we never take an additional expression in `SELECT` context we never only try + // to parse if `can_have_expression` is true. + // + // <https://clickhouse.com/docs/en/interfaces/formats> Review Comment: ah so per earlier comment I'm imagining if we use different representations for the format clauses the impl isn't affected? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org