This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new a8bde39e Add support for `TABLESAMPLE` pipe operator (#1860)
a8bde39e is described below

commit a8bde39efb4c3568fb3dc685b440962d50403fc3
Author: Hendrik Makait <hend...@makait.com>
AuthorDate: Fri May 30 09:14:36 2025 +0200

    Add support for `TABLESAMPLE` pipe operator (#1860)
---
 src/ast/query.rs          | 14 +++++++++++---
 src/parser/mod.rs         | 15 +++++++++++++--
 tests/sqlparser_common.rs |  5 +++++
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/ast/query.rs b/src/ast/query.rs
index 5b784b19..ffe1e402 100644
--- a/src/ast/query.rs
+++ b/src/ast/query.rs
@@ -1559,7 +1559,7 @@ impl fmt::Display for TableSampleBucket {
 }
 impl fmt::Display for TableSample {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, " {}", self.modifier)?;
+        write!(f, "{}", self.modifier)?;
         if let Some(name) = &self.name {
             write!(f, " {}", name)?;
         }
@@ -1862,7 +1862,7 @@ impl fmt::Display for TableFactor {
                     write!(f, " WITH ORDINALITY")?;
                 }
                 if let Some(TableSampleKind::BeforeTableAlias(sample)) = 
sample {
-                    write!(f, "{sample}")?;
+                    write!(f, " {sample}")?;
                 }
                 if let Some(alias) = alias {
                     write!(f, " AS {alias}")?;
@@ -1877,7 +1877,7 @@ impl fmt::Display for TableFactor {
                     write!(f, "{version}")?;
                 }
                 if let Some(TableSampleKind::AfterTableAlias(sample)) = sample 
{
-                    write!(f, "{sample}")?;
+                    write!(f, " {sample}")?;
                 }
                 Ok(())
             }
@@ -2680,6 +2680,10 @@ pub enum PipeOperator {
         full_table_exprs: Vec<ExprWithAliasAndOrderBy>,
         group_by_expr: Vec<ExprWithAliasAndOrderBy>,
     },
+    /// Selects a random sample of rows from the input table.
+    /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT)
+    /// See more at 
<https://cloud.google.com/bigquery/docs/reference/standard-sql/pipe-syntax#tablesample_pipe_operator>
+    TableSample { sample: Box<TableSample> },
 }
 
 impl fmt::Display for PipeOperator {
@@ -2731,6 +2735,10 @@ impl fmt::Display for PipeOperator {
             PipeOperator::OrderBy { exprs } => {
                 write!(f, "ORDER BY {}", 
display_comma_separated(exprs.as_slice()))
             }
+
+            PipeOperator::TableSample { sample } => {
+                write!(f, "{}", sample)
+            }
         }
     }
 }
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index c1be8764..6d642384 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -11054,6 +11054,7 @@ impl<'a> Parser<'a> {
                 Keyword::LIMIT,
                 Keyword::AGGREGATE,
                 Keyword::ORDER,
+                Keyword::TABLESAMPLE,
             ])?;
             match kw {
                 Keyword::SELECT => {
@@ -11116,6 +11117,10 @@ impl<'a> Parser<'a> {
                     let exprs = 
self.parse_comma_separated(Parser::parse_order_by_expr)?;
                     pipe_operators.push(PipeOperator::OrderBy { exprs })
                 }
+                Keyword::TABLESAMPLE => {
+                    let sample = 
self.parse_table_sample(TableSampleModifier::TableSample)?;
+                    pipe_operators.push(PipeOperator::TableSample { sample });
+                }
                 unhandled => {
                     return Err(ParserError::ParserError(format!(
                     "`expect_one_of_keywords` further up allowed unhandled 
keyword: {unhandled:?}"
@@ -12760,7 +12765,13 @@ impl<'a> Parser<'a> {
         } else {
             return Ok(None);
         };
+        self.parse_table_sample(modifier).map(Some)
+    }
 
+    fn parse_table_sample(
+        &mut self,
+        modifier: TableSampleModifier,
+    ) -> Result<Box<TableSample>, ParserError> {
         let name = match self.parse_one_of_keywords(&[
             Keyword::BERNOULLI,
             Keyword::ROW,
@@ -12842,14 +12853,14 @@ impl<'a> Parser<'a> {
             None
         };
 
-        Ok(Some(Box::new(TableSample {
+        Ok(Box::new(TableSample {
             modifier,
             name,
             quantity,
             seed,
             bucket,
             offset,
-        })))
+        }))
     }
 
     fn parse_table_sample_seed(
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index d02d7d83..1cc79317 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -15156,6 +15156,11 @@ fn parse_pipeline_operator() {
     dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC");
     dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name 
ASC");
 
+    // tablesample pipe operator
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)");
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 
PERCENT)");
+    dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) 
REPEATABLE (10)");
+
     // many pipes
     dialects.verified_stmt(
         "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost 
GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY 
customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) 
AS average GROUP BY state DESC, item_type ASC",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to