This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 0c3b6c09 Add support for ClickHouse `FORMAT` on `INSERT` (#1628)
0c3b6c09 is described below

commit 0c3b6c09740389064f28fd4db548da3085034269
Author: Simon Sawert <[email protected]>
AuthorDate: Fri Jan 10 18:17:28 2025 +0100

    Add support for ClickHouse `FORMAT` on `INSERT` (#1628)
---
 src/ast/dml.rs                | 31 +++++++++++++----
 src/ast/mod.rs                |  8 ++---
 src/ast/query.rs              | 23 +++++++++++++
 src/ast/spans.rs              |  2 ++
 src/dialect/clickhouse.rs     | 12 +++++++
 src/dialect/mod.rs            |  5 +++
 src/keywords.rs               |  2 --
 src/parser/mod.rs             | 78 +++++++++++++++++++++++++++++++------------
 tests/sqlparser_clickhouse.rs | 20 +++++++++++
 tests/sqlparser_postgres.rs   | 10 ++++--
 10 files changed, 155 insertions(+), 36 deletions(-)

diff --git a/src/ast/dml.rs b/src/ast/dml.rs
index d68a2277..de555c10 100644
--- a/src/ast/dml.rs
+++ b/src/ast/dml.rs
@@ -32,11 +32,11 @@ use sqlparser_derive::{Visit, VisitMut};
 pub use super::ddl::{ColumnDef, TableConstraint};
 
 use super::{
-    display_comma_separated, display_separated, Assignment, ClusteredBy, 
CommentDef, Expr,
-    FileFormat, FromTable, HiveDistributionStyle, HiveFormat, HiveIOFormat, 
HiveRowFormat, Ident,
-    InsertAliases, MysqlInsertPriority, ObjectName, OnCommit, OnInsert, 
OneOrManyWithParens,
-    OrderByExpr, Query, RowAccessPolicy, SelectItem, SqlOption, 
SqliteOnConflict, TableEngine,
-    TableObject, TableWithJoins, Tag, WrappedCollection,
+    display_comma_separated, display_separated, query::InputFormatClause, 
Assignment, ClusteredBy,
+    CommentDef, Expr, FileFormat, FromTable, HiveDistributionStyle, 
HiveFormat, HiveIOFormat,
+    HiveRowFormat, Ident, InsertAliases, MysqlInsertPriority, ObjectName, 
OnCommit, OnInsert,
+    OneOrManyWithParens, OrderByExpr, Query, RowAccessPolicy, SelectItem, 
Setting, SqlOption,
+    SqliteOnConflict, TableEngine, TableObject, TableWithJoins, Tag, 
WrappedCollection,
 };
 
 /// CREATE INDEX statement.
@@ -497,6 +497,19 @@ pub struct Insert {
     pub priority: Option<MysqlInsertPriority>,
     /// Only for mysql
     pub insert_alias: Option<InsertAliases>,
+    /// Settings used for ClickHouse.
+    ///
+    /// ClickHouse syntax: `INSERT INTO tbl SETTINGS format_template_resultset 
= '/some/path/resultset.format'`
+    ///
+    /// [ClickHouse `INSERT 
INTO`](https://clickhouse.com/docs/en/sql-reference/statements/insert-into)
+    pub settings: Option<Vec<Setting>>,
+    /// Format for `INSERT` statement when not using standard SQL format. Can 
be e.g. `CSV`,
+    /// `JSON`, `JSONAsString`, `LineAsString` and more.
+    ///
+    /// ClickHouse syntax: `INSERT INTO tbl FORMAT JSONEachRow {"foo": 1, 
"bar": 2}, {"foo": 3}`
+    ///
+    /// [ClickHouse formats JSON 
insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data)
+    pub format_clause: Option<InputFormatClause>,
 }
 
 impl Display for Insert {
@@ -545,12 +558,18 @@ impl Display for Insert {
             write!(f, "({}) ", display_comma_separated(&self.after_columns))?;
         }
 
+        if let Some(settings) = &self.settings {
+            write!(f, "SETTINGS {} ", display_comma_separated(settings))?;
+        }
+
         if let Some(source) = &self.source {
             write!(f, "{source}")?;
         } else if !self.assignments.is_empty() {
             write!(f, "SET ")?;
             write!(f, "{}", display_comma_separated(&self.assignments))?;
-        } else if self.source.is_none() && self.columns.is_empty() {
+        } else if let Some(format_clause) = &self.format_clause {
+            write!(f, "{format_clause}")?;
+        } else if self.columns.is_empty() {
             write!(f, "DEFAULT VALUES")?;
         }
 
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 5ab2fc93..1f8df352 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -61,10 +61,10 @@ pub use self::operator::{BinaryOperator, UnaryOperator};
 pub use self::query::{
     AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, 
EmptyMatchesMode,
     ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, Fetch, ForClause, 
ForJson, ForXml,
-    FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, 
IlikeSelectItem, Interpolate,
-    InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonTableColumn,
-    JsonTableColumnErrorHandling, JsonTableNamedColumn, JsonTableNestedColumn, 
LateralView,
-    LockClause, LockType, MatchRecognizePattern, MatchRecognizeSymbol, Measure,
+    FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, 
IlikeSelectItem,
+    InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, 
JoinOperator,
+    JsonTableColumn, JsonTableColumnErrorHandling, JsonTableNamedColumn, 
JsonTableNestedColumn,
+    LateralView, LockClause, LockType, MatchRecognizePattern, 
MatchRecognizeSymbol, Measure,
     NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, OffsetRows, 
OpenJsonTableColumn,
     OrderBy, OrderByExpr, PivotValueSource, ProjectionSelect, Query, 
RenameSelectItem,
     RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, 
RowsPerMatch, Select,
diff --git a/src/ast/query.rs b/src/ast/query.rs
index 2f0663a5..e7020ae2 100644
--- a/src/ast/query.rs
+++ b/src/ast/query.rs
@@ -2480,6 +2480,29 @@ impl fmt::Display for FormatClause {
     }
 }
 
+/// FORMAT identifier in input context, specific to ClickHouse.
+///
+/// [ClickHouse]: <https://clickhouse.com/docs/en/interfaces/formats>
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub struct InputFormatClause {
+    pub ident: Ident,
+    pub values: Vec<Expr>,
+}
+
+impl fmt::Display for InputFormatClause {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "FORMAT {}", self.ident)?;
+
+        if !self.values.is_empty() {
+            write!(f, " {}", display_comma_separated(self.values.as_slice()))?;
+        }
+
+        Ok(())
+    }
+}
+
 /// FOR XML or FOR JSON clause, specific to MSSQL
 /// (formats the output of a query as XML or JSON)
 #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
diff --git a/src/ast/spans.rs b/src/ast/spans.rs
index 8a27c4ac..19f6074b 100644
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@@ -1156,6 +1156,8 @@ impl Spanned for Insert {
             priority: _,     // todo, mysql specific
             insert_alias: _, // todo, mysql specific
             assignments,
+            settings: _,      // todo, clickhouse specific
+            format_clause: _, // todo, clickhouse specific
         } = self;
 
         union_spans(
diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs
index 267f766f..884dfcbc 100644
--- a/src/dialect/clickhouse.rs
+++ b/src/dialect/clickhouse.rs
@@ -54,4 +54,16 @@ impl Dialect for ClickHouseDialect {
     fn supports_insert_table_function(&self) -> bool {
         true
     }
+
+    fn supports_insert_format(&self) -> bool {
+        true
+    }
+
+    // ClickHouse uses this for some FORMAT expressions in `INSERT` context, 
e.g. when inserting
+    // with FORMAT JSONEachRow a raw JSON key-value expression is valid and 
expected.
+    //
+    // [ClickHouse formats](https://clickhouse.com/docs/en/interfaces/formats)
+    fn supports_dictionary_syntax(&self) -> bool {
+        true
+    }
 }
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index a682e4f6..32b0ed48 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -797,6 +797,11 @@ pub trait Dialect: Debug + Any {
     fn supports_insert_table_function(&self) -> bool {
         false
     }
+
+    /// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT 
<format>`
+    fn supports_insert_format(&self) -> bool {
+        false
+    }
 }
 
 /// This represents the operators for which precedence must be defined
diff --git a/src/keywords.rs b/src/keywords.rs
index bd538ec6..8c8860e1 100644
--- a/src/keywords.rs
+++ b/src/keywords.rs
@@ -949,9 +949,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
     Keyword::PARTITION,
     // for Clickhouse PREWHERE
     Keyword::PREWHERE,
-    // for ClickHouse SELECT * FROM t SETTINGS ...
     Keyword::SETTINGS,
-    // for ClickHouse SELECT * FROM t FORMAT...
     Keyword::FORMAT,
     // for Snowflake START WITH .. CONNECT BY
     Keyword::START,
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index b6e3fd1c..c1740251 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -12033,35 +12033,55 @@ impl<'a> Parser<'a> {
 
             let is_mysql = dialect_of!(self is MySqlDialect);
 
-            let (columns, partitioned, after_columns, source, assignments) =
-                if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) {
-                    (vec![], None, vec![], None, vec![])
-                } else {
-                    let (columns, partitioned, after_columns) = if 
!self.peek_subquery_start() {
-                        let columns = 
self.parse_parenthesized_column_list(Optional, is_mysql)?;
+            let (columns, partitioned, after_columns, source, assignments) = 
if self
+                .parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES])
+            {
+                (vec![], None, vec![], None, vec![])
+            } else {
+                let (columns, partitioned, after_columns) = if 
!self.peek_subquery_start() {
+                    let columns = 
self.parse_parenthesized_column_list(Optional, is_mysql)?;
 
-                        let partitioned = self.parse_insert_partition()?;
-                        // Hive allows you to specify columns after partitions 
as well if you want.
-                        let after_columns = if dialect_of!(self is 
HiveDialect) {
-                            self.parse_parenthesized_column_list(Optional, 
false)?
-                        } else {
-                            vec![]
-                        };
-                        (columns, partitioned, after_columns)
+                    let partitioned = self.parse_insert_partition()?;
+                    // Hive allows you to specify columns after partitions as 
well if you want.
+                    let after_columns = if dialect_of!(self is HiveDialect) {
+                        self.parse_parenthesized_column_list(Optional, false)?
                     } else {
-                        Default::default()
+                        vec![]
                     };
+                    (columns, partitioned, after_columns)
+                } else {
+                    Default::default()
+                };
 
-                    let (source, assignments) =
-                        if self.dialect.supports_insert_set() && 
self.parse_keyword(Keyword::SET) {
-                            (None, 
self.parse_comma_separated(Parser::parse_assignment)?)
-                        } else {
-                            (Some(self.parse_query()?), vec![])
-                        };
+                let (source, assignments) = if 
self.peek_keyword(Keyword::FORMAT)
+                    || self.peek_keyword(Keyword::SETTINGS)
+                {
+                    (None, vec![])
+                } else if self.dialect.supports_insert_set() && 
self.parse_keyword(Keyword::SET) {
+                    (None, 
self.parse_comma_separated(Parser::parse_assignment)?)
+                } else {
+                    (Some(self.parse_query()?), vec![])
+                };
+
+                (columns, partitioned, after_columns, source, assignments)
+            };
 
-                    (columns, partitioned, after_columns, source, assignments)
+            let (format_clause, settings) = if 
self.dialect.supports_insert_format() {
+                // Settings always comes before `FORMAT` for ClickHouse:
+                // 
<https://clickhouse.com/docs/en/sql-reference/statements/insert-into>
+                let settings = self.parse_settings()?;
+
+                let format = if self.parse_keyword(Keyword::FORMAT) {
+                    Some(self.parse_input_format_clause()?)
+                } else {
+                    None
                 };
 
+                (format, settings)
+            } else {
+                Default::default()
+            };
+
             let insert_alias = if dialect_of!(self is MySqlDialect | 
GenericDialect)
                 && self.parse_keyword(Keyword::AS)
             {
@@ -12146,10 +12166,24 @@ impl<'a> Parser<'a> {
                 replace_into,
                 priority,
                 insert_alias,
+                settings,
+                format_clause,
             }))
         }
     }
 
+    // Parses input format clause used for [ClickHouse].
+    //
+    // <https://clickhouse.com/docs/en/interfaces/formats>
+    pub fn parse_input_format_clause(&mut self) -> Result<InputFormatClause, 
ParserError> {
+        let ident = self.parse_identifier()?;
+        let values = self
+            .maybe_parse(|p| p.parse_comma_separated(|p| p.parse_expr()))?
+            .unwrap_or_default();
+
+        Ok(InputFormatClause { ident, values })
+    }
+
     /// Returns true if the immediate tokens look like the
     /// beginning of a subquery. `(SELECT ...`
     fn peek_subquery_start(&mut self) -> bool {
diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs
index 4fa657ba..fed4308f 100644
--- a/tests/sqlparser_clickhouse.rs
+++ b/tests/sqlparser_clickhouse.rs
@@ -1404,6 +1404,26 @@ fn test_query_with_format_clause() {
     }
 }
 
+#[test]
+fn test_insert_query_with_format_clause() {
+    let cases = [
+        r#"INSERT INTO tbl FORMAT JSONEachRow {"id": 1, "value": "foo"}, 
{"id": 2, "value": "bar"}"#,
+        r#"INSERT INTO tbl FORMAT JSONEachRow ["first", "second", "third"]"#,
+        r#"INSERT INTO tbl FORMAT JSONEachRow [{"first": 1}]"#,
+        r#"INSERT INTO tbl (foo) FORMAT JSONAsObject {"foo": {"bar": {"x": 
"y"}, "baz": 1}}"#,
+        r#"INSERT INTO tbl (foo, bar) FORMAT JSON {"foo": 1, "bar": 2}"#,
+        r#"INSERT INTO tbl FORMAT CSV col1, col2, col3"#,
+        r#"INSERT INTO tbl FORMAT LineAsString "I love apple", "I love 
banana", "I love orange""#,
+        r#"INSERT INTO tbl (foo) SETTINGS 
input_format_json_read_bools_as_numbers = true FORMAT JSONEachRow {"id": 1, 
"value": "foo"}"#,
+        r#"INSERT INTO tbl SETTINGS format_template_resultset = 
'/some/path/resultset.format', format_template_row = '/some/path/row.format' 
FORMAT Template"#,
+        r#"INSERT INTO tbl SETTINGS input_format_json_read_bools_as_numbers = 
true FORMAT JSONEachRow {"id": 1, "value": "foo"}"#,
+    ];
+
+    for sql in &cases {
+        clickhouse().verified_stmt(sql);
+    }
+}
+
 #[test]
 fn parse_create_table_on_commit_and_as_query() {
     let sql = r#"CREATE LOCAL TEMPORARY TABLE test ON COMMIT PRESERVE ROWS AS 
SELECT 1"#;
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index ce31a062..864fb5eb 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -4431,7 +4431,9 @@ fn test_simple_postgres_insert_with_alias() {
             returning: None,
             replace_into: false,
             priority: None,
-            insert_alias: None
+            insert_alias: None,
+            settings: None,
+            format_clause: None,
         })
     )
 }
@@ -4502,7 +4504,9 @@ fn test_simple_postgres_insert_with_alias() {
             returning: None,
             replace_into: false,
             priority: None,
-            insert_alias: None
+            insert_alias: None,
+            settings: None,
+            format_clause: None,
         })
     )
 }
@@ -4570,6 +4574,8 @@ fn test_simple_insert_with_quoted_alias() {
             replace_into: false,
             priority: None,
             insert_alias: None,
+            settings: None,
+            format_clause: None,
         })
     )
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to