This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 4f79997c Include DML keyword in statement span (#2090)
4f79997c is described below

commit 4f79997c8efa1c6089ef5795ac8317152205d8c0
Author: xitep <[email protected]>
AuthorDate: Thu Nov 13 10:27:58 2025 +0100

    Include DML keyword in statement span (#2090)
---
 src/ast/dml.rs              |  13 ++++--
 src/ast/spans.rs            | 111 ++++++++++++++++++++++++++++++++++++++------
 src/dialect/sqlite.rs       |   2 +-
 src/parser/mod.rs           |  51 +++++++++++++-------
 tests/sqlparser_common.rs   |   2 +
 tests/sqlparser_mysql.rs    |   1 +
 tests/sqlparser_postgres.rs |   3 ++
 tests/sqlparser_sqlite.rs   |   4 +-
 8 files changed, 150 insertions(+), 37 deletions(-)

diff --git a/src/ast/dml.rs b/src/ast/dml.rs
index c0bfcb19..d6009ce8 100644
--- a/src/ast/dml.rs
+++ b/src/ast/dml.rs
@@ -27,9 +27,10 @@ use sqlparser_derive::{Visit, VisitMut};
 use crate::display_utils::{indented_list, Indent, SpaceOrNewline};
 
 use super::{
-    display_comma_separated, query::InputFormatClause, Assignment, Expr, 
FromTable, Ident,
-    InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OrderByExpr, 
Query, SelectItem,
-    Setting, SqliteOnConflict, TableObject, TableWithJoins, 
UpdateTableFromKind,
+    display_comma_separated, helpers::attached_token::AttachedToken, 
query::InputFormatClause,
+    Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, 
ObjectName, OnInsert,
+    OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, 
TableWithJoins,
+    UpdateTableFromKind,
 };
 
 /// INSERT statement.
@@ -37,6 +38,8 @@ use super::{
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Insert {
+    /// Token for the `INSERT` keyword (or its substitutes)
+    pub insert_token: AttachedToken,
     /// Only for Sqlite
     pub or: Option<SqliteOnConflict>,
     /// Only for mysql
@@ -179,6 +182,8 @@ impl Display for Insert {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Delete {
+    /// Token for the `DELETE` keyword
+    pub delete_token: AttachedToken,
     /// Multi tables delete are supported in mysql
     pub tables: Vec<ObjectName>,
     /// FROM
@@ -246,6 +251,8 @@ impl Display for Delete {
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
 pub struct Update {
+    /// Token for the `UPDATE` keyword
+    pub update_token: AttachedToken,
     /// TABLE
     pub table: TableWithJoins,
     /// Column assignments
diff --git a/src/ast/spans.rs b/src/ast/spans.rs
index 719e261c..3a4f1d02 100644
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@@ -839,6 +839,7 @@ impl Spanned for CopySource {
 impl Spanned for Delete {
     fn span(&self) -> Span {
         let Delete {
+            delete_token,
             tables,
             from,
             using,
@@ -849,19 +850,21 @@ impl Spanned for Delete {
         } = self;
 
         union_spans(
-            tables
-                .iter()
-                .map(|i| i.span())
-                .chain(core::iter::once(from.span()))
-                .chain(
-                    using
-                        .iter()
-                        .map(|u| union_spans(u.iter().map(|i| i.span()))),
-                )
-                .chain(selection.iter().map(|i| i.span()))
-                .chain(returning.iter().flat_map(|i| i.iter().map(|k| 
k.span())))
-                .chain(order_by.iter().map(|i| i.span()))
-                .chain(limit.iter().map(|i| i.span())),
+            core::iter::once(delete_token.0.span).chain(
+                tables
+                    .iter()
+                    .map(|i| i.span())
+                    .chain(core::iter::once(from.span()))
+                    .chain(
+                        using
+                            .iter()
+                            .map(|u| union_spans(u.iter().map(|i| i.span()))),
+                    )
+                    .chain(selection.iter().map(|i| i.span()))
+                    .chain(returning.iter().flat_map(|i| i.iter().map(|k| 
k.span())))
+                    .chain(order_by.iter().map(|i| i.span()))
+                    .chain(limit.iter().map(|i| i.span())),
+            ),
         )
     }
 }
@@ -869,6 +872,7 @@ impl Spanned for Delete {
 impl Spanned for Update {
     fn span(&self) -> Span {
         let Update {
+            update_token,
             table,
             assignments,
             from,
@@ -880,6 +884,7 @@ impl Spanned for Update {
 
         union_spans(
             core::iter::once(table.span())
+                .chain(core::iter::once(update_token.0.span))
                 .chain(assignments.iter().map(|i| i.span()))
                 .chain(from.iter().map(|i| i.span()))
                 .chain(selection.iter().map(|i| i.span()))
@@ -1217,6 +1222,7 @@ impl Spanned for AlterIndexOperation {
 impl Spanned for Insert {
     fn span(&self) -> Span {
         let Insert {
+            insert_token,
             or: _,     // enum, sqlite specific
             ignore: _, // bool
             into: _,   // bool
@@ -1239,7 +1245,8 @@ impl Spanned for Insert {
         } = self;
 
         union_spans(
-            core::iter::once(table.span())
+            core::iter::once(insert_token.0.span)
+                .chain(core::iter::once(table.span()))
                 .chain(table_alias.as_ref().map(|i| i.span))
                 .chain(columns.iter().map(|i| i.span))
                 .chain(source.as_ref().map(|q| q.span()))
@@ -2540,4 +2547,80 @@ ALTER TABLE users
         assert_eq!(stmt_span.start, (2, 13).into());
         assert_eq!(stmt_span.end, (4, 11).into());
     }
+
+    #[test]
+    fn test_update_statement_span() {
+        let sql = r#"-- foo
+      UPDATE foo
+   /* bar */
+   SET bar = 3
+ WHERE quux > 42 ;
+"#;
+
+        let r = Parser::parse_sql(&crate::dialect::GenericDialect, 
sql).unwrap();
+        assert_eq!(1, r.len());
+
+        let stmt_span = r[0].span();
+
+        assert_eq!(stmt_span.start, (2, 7).into());
+        assert_eq!(stmt_span.end, (5, 17).into());
+    }
+
+    #[test]
+    fn test_insert_statement_span() {
+        let sql = r#"
+/* foo */ INSERT  INTO  FOO  (X, Y, Z)
+  SELECT 1, 2, 3
+  FROM DUAL
+;"#;
+
+        let r = Parser::parse_sql(&crate::dialect::GenericDialect, 
sql).unwrap();
+        assert_eq!(1, r.len());
+
+        let stmt_span = r[0].span();
+
+        assert_eq!(stmt_span.start, (2, 11).into());
+        assert_eq!(stmt_span.end, (4, 12).into());
+    }
+
+    #[test]
+    fn test_replace_statement_span() {
+        let sql = r#"
+/* foo */ REPLACE INTO
+    cities(name,population)
+SELECT
+    name,
+    population
+FROM
+   cities
+WHERE id = 1
+;"#;
+
+        let r = Parser::parse_sql(&crate::dialect::GenericDialect, 
sql).unwrap();
+        assert_eq!(1, r.len());
+
+        dbg!(&r[0]);
+
+        let stmt_span = r[0].span();
+
+        assert_eq!(stmt_span.start, (2, 11).into());
+        assert_eq!(stmt_span.end, (9, 13).into());
+    }
+
+    #[test]
+    fn test_delete_statement_span() {
+        let sql = r#"-- foo
+      DELETE /* quux */
+        FROM foo
+       WHERE foo.x = 42
+;"#;
+
+        let r = Parser::parse_sql(&crate::dialect::GenericDialect, 
sql).unwrap();
+        assert_eq!(1, r.len());
+
+        let stmt_span = r[0].span();
+
+        assert_eq!(stmt_span.start, (2, 7).into());
+        assert_eq!(stmt_span.end, (4, 24).into());
+    }
 }
diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs
index 64a8d532..ba4cb617 100644
--- a/src/dialect/sqlite.rs
+++ b/src/dialect/sqlite.rs
@@ -68,7 +68,7 @@ impl Dialect for SQLiteDialect {
     fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, 
ParserError>> {
         if parser.parse_keyword(Keyword::REPLACE) {
             parser.prev_token();
-            Some(parser.parse_insert())
+            Some(parser.parse_insert(parser.get_current_token().clone()))
         } else {
             None
         }
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 2744a967..0b2158e6 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -586,11 +586,11 @@ impl<'a> Parser<'a> {
                 Keyword::DISCARD => self.parse_discard(),
                 Keyword::DECLARE => self.parse_declare(),
                 Keyword::FETCH => self.parse_fetch_statement(),
-                Keyword::DELETE => self.parse_delete(),
-                Keyword::INSERT => self.parse_insert(),
-                Keyword::REPLACE => self.parse_replace(),
+                Keyword::DELETE => self.parse_delete(next_token),
+                Keyword::INSERT => self.parse_insert(next_token),
+                Keyword::REPLACE => self.parse_replace(next_token),
                 Keyword::UNCACHE => self.parse_uncache_table(),
-                Keyword::UPDATE => self.parse_update(),
+                Keyword::UPDATE => self.parse_update(next_token),
                 Keyword::ALTER => self.parse_alter(),
                 Keyword::CALL => self.parse_call(),
                 Keyword::COPY => self.parse_copy(),
@@ -11817,8 +11817,11 @@ impl<'a> Parser<'a> {
     /// Parse a DELETE statement, returning a `Box`ed SetExpr
     ///
     /// This is used to reduce the size of the stack frames in debug builds
-    fn parse_delete_setexpr_boxed(&mut self) -> Result<Box<SetExpr>, 
ParserError> {
-        Ok(Box::new(SetExpr::Delete(self.parse_delete()?)))
+    fn parse_delete_setexpr_boxed(
+        &mut self,
+        delete_token: TokenWithSpan,
+    ) -> Result<Box<SetExpr>, ParserError> {
+        Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?)))
     }
 
     /// Parse a MERGE statement, returning a `Box`ed SetExpr
@@ -11828,7 +11831,7 @@ impl<'a> Parser<'a> {
         Ok(Box::new(SetExpr::Merge(self.parse_merge()?)))
     }
 
-    pub fn parse_delete(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> 
Result<Statement, ParserError> {
         let (tables, with_from_keyword) = if 
!self.parse_keyword(Keyword::FROM) {
             // `FROM` keyword is optional in BigQuery SQL.
             // 
https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement
@@ -11871,6 +11874,7 @@ impl<'a> Parser<'a> {
         };
 
         Ok(Statement::Delete(Delete {
+            delete_token: delete_token.into(),
             tables,
             from: if with_from_keyword {
                 FromTable::WithFromKeyword(from)
@@ -12000,7 +12004,7 @@ impl<'a> Parser<'a> {
         if self.parse_keyword(Keyword::INSERT) {
             Ok(Query {
                 with,
-                body: self.parse_insert_setexpr_boxed()?,
+                body: 
self.parse_insert_setexpr_boxed(self.get_current_token().clone())?,
                 order_by: None,
                 limit_clause: None,
                 fetch: None,
@@ -12014,7 +12018,7 @@ impl<'a> Parser<'a> {
         } else if self.parse_keyword(Keyword::UPDATE) {
             Ok(Query {
                 with,
-                body: self.parse_update_setexpr_boxed()?,
+                body: 
self.parse_update_setexpr_boxed(self.get_current_token().clone())?,
                 order_by: None,
                 limit_clause: None,
                 fetch: None,
@@ -12028,7 +12032,7 @@ impl<'a> Parser<'a> {
         } else if self.parse_keyword(Keyword::DELETE) {
             Ok(Query {
                 with,
-                body: self.parse_delete_setexpr_boxed()?,
+                body: 
self.parse_delete_setexpr_boxed(self.get_current_token().clone())?,
                 limit_clause: None,
                 order_by: None,
                 fetch: None,
@@ -15470,7 +15474,10 @@ impl<'a> Parser<'a> {
     }
 
     /// Parse an REPLACE statement
-    pub fn parse_replace(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_replace(
+        &mut self,
+        replace_token: TokenWithSpan,
+    ) -> Result<Statement, ParserError> {
         if !dialect_of!(self is MySqlDialect | GenericDialect) {
             return parser_err!(
                 "Unsupported statement REPLACE",
@@ -15478,7 +15485,7 @@ impl<'a> Parser<'a> {
             );
         }
 
-        let mut insert = self.parse_insert()?;
+        let mut insert = self.parse_insert(replace_token)?;
         if let Statement::Insert(Insert { replace_into, .. }) = &mut insert {
             *replace_into = true;
         }
@@ -15489,12 +15496,15 @@ impl<'a> Parser<'a> {
     /// Parse an INSERT statement, returning a `Box`ed SetExpr
     ///
     /// This is used to reduce the size of the stack frames in debug builds
-    fn parse_insert_setexpr_boxed(&mut self) -> Result<Box<SetExpr>, 
ParserError> {
-        Ok(Box::new(SetExpr::Insert(self.parse_insert()?)))
+    fn parse_insert_setexpr_boxed(
+        &mut self,
+        insert_token: TokenWithSpan,
+    ) -> Result<Box<SetExpr>, ParserError> {
+        Ok(Box::new(SetExpr::Insert(self.parse_insert(insert_token)?)))
     }
 
     /// Parse an INSERT statement
-    pub fn parse_insert(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_insert(&mut self, insert_token: TokenWithSpan) -> 
Result<Statement, ParserError> {
         let or = self.parse_conflict_clause();
         let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) {
             None
@@ -15663,6 +15673,7 @@ impl<'a> Parser<'a> {
             };
 
             Ok(Statement::Insert(Insert {
+                insert_token: insert_token.into(),
                 or,
                 table: table_object,
                 table_alias,
@@ -15754,11 +15765,14 @@ impl<'a> Parser<'a> {
     /// Parse an UPDATE statement, returning a `Box`ed SetExpr
     ///
     /// This is used to reduce the size of the stack frames in debug builds
-    fn parse_update_setexpr_boxed(&mut self) -> Result<Box<SetExpr>, 
ParserError> {
-        Ok(Box::new(SetExpr::Update(self.parse_update()?)))
+    fn parse_update_setexpr_boxed(
+        &mut self,
+        update_token: TokenWithSpan,
+    ) -> Result<Box<SetExpr>, ParserError> {
+        Ok(Box::new(SetExpr::Update(self.parse_update(update_token)?)))
     }
 
-    pub fn parse_update(&mut self) -> Result<Statement, ParserError> {
+    pub fn parse_update(&mut self, update_token: TokenWithSpan) -> 
Result<Statement, ParserError> {
         let or = self.parse_conflict_clause();
         let table = self.parse_table_and_joins()?;
         let from_before_set = if self.parse_keyword(Keyword::FROM) {
@@ -15793,6 +15807,7 @@ impl<'a> Parser<'a> {
             None
         };
         Ok(Update {
+            update_token: update_token.into(),
             table,
             assignments,
             from,
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index b360f751..b06f1141 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -456,6 +456,7 @@ fn parse_update_set_from() {
     assert_eq!(
         stmt,
         Statement::Update(Update {
+            update_token: AttachedToken::empty(),
             table: TableWithJoins {
                 relation: 
table_from_name(ObjectName::from(vec![Ident::new("t1")])),
                 joins: vec![],
@@ -551,6 +552,7 @@ fn parse_update_with_table_alias() {
             returning,
             or: None,
             limit: None,
+            update_token: _,
         }) => {
             assert_eq!(
                 TableWithJoins {
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index b31a5b7c..bc5d48ba 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -2632,6 +2632,7 @@ fn parse_update_with_joins() {
             returning,
             or: None,
             limit: None,
+            update_token: _,
         }) => {
             assert_eq!(
                 TableWithJoins {
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index 4edab706..75d567c1 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -5140,6 +5140,7 @@ fn test_simple_postgres_insert_with_alias() {
     assert_eq!(
         statement,
         Statement::Insert(Insert {
+            insert_token: AttachedToken::empty(),
             or: None,
             ignore: false,
             into: true,
@@ -5210,6 +5211,7 @@ fn test_simple_postgres_insert_with_alias() {
     assert_eq!(
         statement,
         Statement::Insert(Insert {
+            insert_token: AttachedToken::empty(),
             or: None,
             ignore: false,
             into: true,
@@ -5282,6 +5284,7 @@ fn test_simple_insert_with_quoted_alias() {
     assert_eq!(
         statement,
         Statement::Insert(Insert {
+            insert_token: AttachedToken::empty(),
             or: None,
             ignore: false,
             into: true,
diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs
index f1f6cf49..321cfef0 100644
--- a/tests/sqlparser_sqlite.rs
+++ b/tests/sqlparser_sqlite.rs
@@ -22,6 +22,7 @@
 #[macro_use]
 mod test_utils;
 
+use sqlparser::ast::helpers::attached_token::AttachedToken;
 use sqlparser::keywords::Keyword;
 use test_utils::*;
 
@@ -494,7 +495,8 @@ fn parse_update_tuple_row_values() {
             },
             from: None,
             returning: None,
-            limit: None
+            limit: None,
+            update_token: AttachedToken::empty()
         })
     );
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to