This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1ec65a4a4a Further clarification of the supports_filters_pushdown
documentation (#9988)
1ec65a4a4a is described below
commit 1ec65a4a4a697d382d64ac2382b8486709dcf680
Author: D.B. Schwartz <[email protected]>
AuthorDate: Wed Apr 10 10:00:47 2024 -0600
Further clarification of the supports_filters_pushdown documentation (#9988)
* Further refinement of the comment
* Add code example example of how to support a filter
* Update supports_filters_pushdown example so that it compiles
* Add comments to example code in supports_filters_pushdown doc
* Change example to use functional style
* Fixed several issues with the supports_filters_pushdown doc; still need
all required TableProvider impl fns
* cargo fmt
* Update example so it compiles and add headings
* clean
* remove to_string()
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/core/src/datasource/provider.rs | 77 +++++++++++++++++++++++++++---
1 file changed, 70 insertions(+), 7 deletions(-)
diff --git a/datafusion/core/src/datasource/provider.rs
b/datafusion/core/src/datasource/provider.rs
index 9aac072ed4..100011952b 100644
--- a/datafusion/core/src/datasource/provider.rs
+++ b/datafusion/core/src/datasource/provider.rs
@@ -161,20 +161,83 @@ pub trait TableProvider: Sync + Send {
/// Specify if DataFusion should provide filter expressions to the
/// TableProvider to apply *during* the scan.
///
- /// The return value must have one element for each filter expression
passed
- /// in. The value of each element indicates if the TableProvider can apply
- /// that particular filter during the scan.
- ///
/// Some TableProviders can evaluate filters more efficiently than the
/// `Filter` operator in DataFusion, for example by using an index.
///
- /// By default, returns [`Unsupported`] for all filters, meaning no filters
- /// will be provided to [`Self::scan`]. If the TableProvider can implement
- /// filter pushdown, it should return either [`Exact`] or [`Inexact`].
+ /// # Parameters and Return Value
+ ///
+ /// The return `Vec` must have one element for each element of the
`filters`
+ /// argument. The value of each element indicates if the TableProvider can
+ /// apply the corresponding filter during the scan. The position in the
return
+ /// value corresponds to the expression in the `filters` parameter.
+ ///
+ /// If the length of the resulting `Vec` does not match the `filters` input
+ /// an error will be thrown.
+ ///
+ /// Each element in the resulting `Vec` is one of the following:
+ /// * [`Exact`] or [`Inexact`]: The TableProvider can apply the filter
+ /// during scan
+ /// * [`Unsupported`]: The TableProvider cannot apply the filter during
scan
+ ///
+ /// By default, this function returns [`Unsupported`] for all filters,
+ /// meaning no filters will be provided to [`Self::scan`].
///
/// [`Unsupported`]: TableProviderFilterPushDown::Unsupported
/// [`Exact`]: TableProviderFilterPushDown::Exact
/// [`Inexact`]: TableProviderFilterPushDown::Inexact
+ /// # Example
+ ///
+ /// ```rust
+ /// # use std::any::Any;
+ /// # use std::sync::Arc;
+ /// # use arrow_schema::SchemaRef;
+ /// # use async_trait::async_trait;
+ /// # use datafusion::datasource::TableProvider;
+ /// # use datafusion::error::{Result, DataFusionError};
+ /// # use datafusion::execution::context::SessionState;
+ /// # use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+ /// # use datafusion_physical_plan::ExecutionPlan;
+ /// // Define a struct that implements the TableProvider trait
+ /// struct TestDataSource {}
+ ///
+ /// #[async_trait]
+ /// impl TableProvider for TestDataSource {
+ /// # fn as_any(&self) -> &dyn Any { todo!() }
+ /// # fn schema(&self) -> SchemaRef { todo!() }
+ /// # fn table_type(&self) -> TableType { todo!() }
+ /// # async fn scan(&self, s: &SessionState, p: Option<&Vec<usize>>, f:
&[Expr], l: Option<usize>) -> Result<Arc<dyn ExecutionPlan>> {
+ /// todo!()
+ /// # }
+ /// // Override the supports_filters_pushdown to evaluate which
expressions
+ /// // to accept as pushdown predicates.
+ /// fn supports_filters_pushdown(&self, filters: &[&Expr]) ->
Result<Vec<TableProviderFilterPushDown>> {
+ /// // Process each filter
+ /// let support: Vec<_> = filters.iter().map(|expr| {
+ /// match expr {
+ /// // This example only supports a between expr with a single
column named "c1".
+ /// Expr::Between(between_expr) => {
+ /// between_expr.expr
+ /// .try_into_col()
+ /// .map(|column| {
+ /// if column.name == "c1" {
+ /// TableProviderFilterPushDown::Exact
+ /// } else {
+ /// TableProviderFilterPushDown::Unsupported
+ /// }
+ /// })
+ /// // If there is no column in the expr set the filter to
unsupported.
+ /// .unwrap_or(TableProviderFilterPushDown::Unsupported)
+ /// }
+ /// _ => {
+ /// // For all other cases return Unsupported.
+ /// TableProviderFilterPushDown::Unsupported
+ /// }
+ /// }
+ /// }).collect();
+ /// Ok(support)
+ /// }
+ /// }
+ /// ```
fn supports_filters_pushdown(
&self,
filters: &[&Expr],