Re: [PR] Support some of pipe operators [datafusion-sqlparser-rs]

via GitHub Wed, 12 Mar 2025 12:48:26 -0700


iffyio commented on code in PR #1759:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1759#discussion_r1992171898



##########
src/ast/query.rs:
##########
@@ -2407,6 +2413,98 @@ impl fmt::Display for OffsetRows {
     }
 }
 
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum PipeOperator {
+    Limit {
+        expr: Expr,
+        offset: Option<Expr>,
+    },
+    Where {
+        expr: Expr,
+    },
+    OrderBy {
+        exprs: Vec<OrderByExpr>,
+    },
+    Select {
+        exprs: Vec<SelectItem>,
+    },
+    Extend {

Review Comment:
   it would be nice to include a short example doc string for the variants and 
where possible links to the documentation, like here for extended for example 
https://cloud.google.com/bigquery/docs/pipe-syntax-guide#extend-pipe-operator



##########
src/ast/query.rs:
##########
@@ -2407,6 +2413,98 @@ impl fmt::Display for OffsetRows {
     }
 }
 
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum PipeOperator {

Review Comment:
   Can we add a description and link to the docs 
https://cloud.google.com/bigquery/docs/pipe-syntax-guide#basic_syntax



##########
src/parser/mod.rs:
##########
@@ -10319,6 +10321,104 @@ impl<'a> Parser<'a> {
                 None
             };
 
+            let mut pipe_operators = Vec::new();
+
+            // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In 
SQL"
+            // 
https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf
+            while self.consume_token(&Token::VerticalBarRightAngleBracket) {

Review Comment:
   can we move the logic to a dedicated function like `parse_pipe_operators()`?
   



##########
tests/sqlparser_common.rs:
##########
@@ -14654,3 +14662,71 @@ fn parse_set_names() {
     dialects.verified_stmt("SET NAMES 'utf8'");
     dialects.verified_stmt("SET NAMES UTF8 COLLATE bogus");
 }
+
+#[test]
+fn parse_pipeline_operator() {
+    let dialects = all_dialects_where(|d| d.supports_pipe_operator());
+
+    // select pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> SELECT id");
+    dialects.verified_stmt("SELECT * FROM users |> SELECT id, name");
+    dialects.verified_query_with_canonical(
+        "SELECT * FROM users |> SELECT id user_id",
+        "SELECT * FROM users |> SELECT id AS user_id",
+    );
+    dialects.verified_stmt("SELECT * FROM users |> SELECT id AS user_id");
+
+    // extend pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> EXTEND id + 1 AS new_id");
+    dialects.verified_stmt("SELECT * FROM users |> EXTEND id AS new_id, name 
AS new_name");
+    dialects.verified_query_with_canonical(
+        "SELECT * FROM users |> EXTEND id user_id",
+        "SELECT * FROM users |> EXTEND id AS user_id",
+    );
+
+    // set pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1");
+    dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1, name = 
name + ' Doe'");
+
+    // drop pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> DROP id");
+    dialects.verified_stmt("SELECT * FROM users |> DROP id, name");
+
+    // as pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> AS new_users");
+
+    // limit pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> LIMIT 10");
+    dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 OFFSET 5");
+    dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> LIMIT 5");
+    dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> WHERE true");
+
+    // where pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1");
+    dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 AND name = 
'John'");
+    dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 OR name = 
'John'");
+
+    // aggregate pipe operator full table
+    dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*)");
+    dialects.verified_query_with_canonical(
+        "SELECT * FROM users |> AGGREGATE COUNT(*) total_users",
+        "SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users",
+    );
+    dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*) AS 
total_users");
+    dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*), 
MIN(id)");
+
+    // aggregate pipe opeprator with grouping
+    dialects.verified_stmt(
+        "SELECT * FROM users |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) 
AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year",
+    );
+    dialects.verified_stmt(
+        "SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM 
o_orderdate) AS year",
+    );
+    dialects
+        .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR 
FROM o_orderdate)");
+
+    // order by pipe operator
+    dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC");
+    dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
+    dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name 
ASC");
+}

Review Comment:
   can we include a test case with more than one pipe operator? e.g. `SELECT * 
FROM T |> a |> b |> c`



##########
src/parser/mod.rs:
##########
@@ -10319,6 +10321,104 @@ impl<'a> Parser<'a> {
                 None
             };
 
+            let mut pipe_operators = Vec::new();
+
+            // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In 
SQL"
+            // 
https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf

Review Comment:
   ```suggestion
   ```



##########
src/parser/mod.rs:
##########
@@ -10319,6 +10321,104 @@ impl<'a> Parser<'a> {
                 None
             };
 
+            let mut pipe_operators = Vec::new();
+
+            // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In 
SQL"
+            // 
https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf
+            while self.consume_token(&Token::VerticalBarRightAngleBracket) {
+                let kw = self.expect_one_of_keywords(&[
+                    Keyword::SELECT,
+                    Keyword::EXTEND,
+                    Keyword::SET,
+                    Keyword::DROP,
+                    Keyword::AS,
+                    Keyword::WHERE,
+                    Keyword::LIMIT,
+                    Keyword::AGGREGATE,
+                    Keyword::ORDER,
+                ])?;
+                match kw {
+                    // SELECT <expr> [[AS] alias], ...
+                    Keyword::SELECT => {
+                        let exprs = 
self.parse_comma_separated(Parser::parse_select_item)?;
+                        pipe_operators.push(PipeOperator::Select { exprs })
+                    }
+                    // EXTEND <expr> [[AS] alias], ...
+                    Keyword::EXTEND => {
+                        let exprs = 
self.parse_comma_separated(Parser::parse_select_item)?;
+                        pipe_operators.push(PipeOperator::Extend { exprs })
+                    }
+                    // SET <column> = <expression>, ...
+                    Keyword::SET => {
+                        let assignments = 
self.parse_comma_separated(Parser::parse_assignment)?;
+                        pipe_operators.push(PipeOperator::Set { assignments })
+                    }
+                    // DROP <column>, ...
+                    Keyword::DROP => {
+                        let columns = self.parse_identifiers()?;
+                        pipe_operators.push(PipeOperator::Drop { columns })
+                    }
+                    // AS <alias>
+                    Keyword::AS => {
+                        let alias = self.parse_identifier()?;
+                        pipe_operators.push(PipeOperator::Alias { alias })
+                    }
+                    // WHERE <condition>
+                    Keyword::WHERE => {
+                        let expr = self.parse_expr()?;
+                        pipe_operators.push(PipeOperator::Where { expr })
+                    }
+                    // LIMIT <n> [OFFSET <m>]
+                    Keyword::LIMIT => {
+                        let expr = self.parse_expr()?;
+                        let offset = if self.parse_keyword(Keyword::OFFSET) {
+                            Some(self.parse_expr()?)
+                        } else {
+                            None
+                        };
+                        pipe_operators.push(PipeOperator::Limit { expr, offset 
})
+                    }
+                    // AGGREGATE <agg_expr> [[AS] alias], ...
+                    //
+                    // and
+                    //
+                    // AGGREGATE [<agg_expr> [[AS] alias], ...]
+                    // GROUP BY <grouping_expr> [AS alias], ...
+                    Keyword::AGGREGATE => {
+                        let full_table_exprs = self.parse_comma_separated0(
+                            |parser| {
+                                let expr = parser.parse_expr()?;
+                                let alias = 
parser.maybe_parse_select_item_alias()?;
+                                Ok(ExprWithAlias { expr, alias })
+                            },
+                            Token::make_keyword(keywords::GROUP),
+                        )?;
+
+                        let group_by_exprs = if 
self.parse_keywords(&[Keyword::GROUP, Keyword::BY])
+                        {
+                            self.parse_comma_separated(|parser| {
+                                let expr = parser.parse_expr()?;
+                                let alias = 
parser.maybe_parse_select_item_alias()?;
+                                Ok(ExprWithAlias { expr, alias })
+                            })?
+                        } else {
+                            vec![]
+                        };
+                        pipe_operators.push(PipeOperator::Aggregate {
+                            full_table_exprs,
+                            group_by_exprs,
+                        })
+                    }
+                    // ORDER BY <expr> [ASC|DESC], ...
+                    Keyword::ORDER => {
+                        self.expect_one_of_keywords(&[Keyword::BY])?;
+                        let exprs = 
self.parse_comma_separated(Parser::parse_order_by_expr)?;
+                        pipe_operators.push(PipeOperator::OrderBy { exprs })
+                    }
+                    _ => {}

Review Comment:
   This clause look unhandled, for the loop i imagine instead we can something 
like?
   ```rust
   while self.consume_token(&Token::VerticalBarRightAngleBracket) {
       operators.push(self.parse_pipe_operator()?);
   }
   fn parse_pipe_operator() {
       if self.parse_keyword(SELECT) {
           // select operator
       } // ...
       else {
          self.expected("...")
       }
   }
   ```



##########
src/parser/mod.rs:
##########
@@ -10319,6 +10321,104 @@ impl<'a> Parser<'a> {
                 None
             };
 
+            let mut pipe_operators = Vec::new();
+
+            // Syntax from "SQL Has Problems. We Can Fix Them: Pipe Syntax In 
SQL"
+            // 
https://storage.googleapis.com/gweb-research2023-media/pubtools/1004848.pdf
+            while self.consume_token(&Token::VerticalBarRightAngleBracket) {
+                let kw = self.expect_one_of_keywords(&[
+                    Keyword::SELECT,
+                    Keyword::EXTEND,
+                    Keyword::SET,
+                    Keyword::DROP,
+                    Keyword::AS,
+                    Keyword::WHERE,
+                    Keyword::LIMIT,
+                    Keyword::AGGREGATE,
+                    Keyword::ORDER,
+                ])?;
+                match kw {
+                    // SELECT <expr> [[AS] alias], ...
+                    Keyword::SELECT => {
+                        let exprs = 
self.parse_comma_separated(Parser::parse_select_item)?;
+                        pipe_operators.push(PipeOperator::Select { exprs })
+                    }
+                    // EXTEND <expr> [[AS] alias], ...
+                    Keyword::EXTEND => {
+                        let exprs = 
self.parse_comma_separated(Parser::parse_select_item)?;
+                        pipe_operators.push(PipeOperator::Extend { exprs })
+                    }
+                    // SET <column> = <expression>, ...
+                    Keyword::SET => {
+                        let assignments = 
self.parse_comma_separated(Parser::parse_assignment)?;
+                        pipe_operators.push(PipeOperator::Set { assignments })
+                    }
+                    // DROP <column>, ...
+                    Keyword::DROP => {
+                        let columns = self.parse_identifiers()?;
+                        pipe_operators.push(PipeOperator::Drop { columns })
+                    }
+                    // AS <alias>
+                    Keyword::AS => {
+                        let alias = self.parse_identifier()?;
+                        pipe_operators.push(PipeOperator::Alias { alias })
+                    }
+                    // WHERE <condition>
+                    Keyword::WHERE => {
+                        let expr = self.parse_expr()?;
+                        pipe_operators.push(PipeOperator::Where { expr })
+                    }
+                    // LIMIT <n> [OFFSET <m>]
+                    Keyword::LIMIT => {
+                        let expr = self.parse_expr()?;
+                        let offset = if self.parse_keyword(Keyword::OFFSET) {
+                            Some(self.parse_expr()?)
+                        } else {
+                            None
+                        };
+                        pipe_operators.push(PipeOperator::Limit { expr, offset 
})
+                    }
+                    // AGGREGATE <agg_expr> [[AS] alias], ...
+                    //
+                    // and
+                    //
+                    // AGGREGATE [<agg_expr> [[AS] alias], ...]
+                    // GROUP BY <grouping_expr> [AS alias], ...

Review Comment:
   Ah so I think these comments would be better placed on in the doc strings 
for each variant (left similar comment at that site)



##########
src/dialect/mod.rs:
##########
@@ -508,6 +508,20 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Return true if the dialect supports pipe operator.
+    ///
+    /// Example:
+    /// ```sql
+    /// SELECT *
+    /// FROM table
+    /// |> limit 1
+    /// ```
+    ///
+    /// See "SQL Has Problems. We Can Fix Them: Pipe Syntax In SQL" 
https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/

Review Comment:
   I think we can link to a concrete syntax like this instead
   https://cloud.google.com/bigquery/docs/pipe-syntax-guide#basic_syntax
   
   The paper wouldn't really help as much in the context of someone working on 
the codebase or trying to use the library



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Re: [PR] Support some of pipe operators [datafusion-sqlparser-rs]

Reply via email to