zeodtr commented on issue #5157:
URL: 
https://github.com/apache/arrow-datafusion/issues/5157#issuecomment-1414719714

   @alamb 
   I've simplified the benchmark program as follows. (I would like to upload 
the file as an attachment, but it's impossible in my current environment) It is 
a single file, named `main.rs`. A lot of it is based on 
`datafusion/sql/examples/sql.rs` except for the optimizer part. The optimizer 
part is based on https://crates.io/crates/datafusion-optimizer.
   
   ```rust
   use std::{collections::HashMap, sync::Arc};
   
   use datafusion::{
       arrow::datatypes::{DataType, Field, Schema},
       common::Result,
       config::ConfigOptions,
       error::DataFusionError,
       logical_expr::{
           logical_plan::builder::LogicalTableSource, AggregateUDF, 
LogicalPlan, ScalarUDF,
           TableSource,
       },
       optimizer::{optimizer::Optimizer, OptimizerContext, OptimizerRule},
       sql::{
           planner::{ContextProvider, SqlToRel},
           sqlparser::{dialect::GenericDialect, parser::Parser},
           TableReference,
       },
   };
   
   #[global_allocator]
   static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
   
   fn main() {
       let sql = "select column1 from table1";
       let schema_provider = TestSchemaProvider::new();
   
       let now = std::time::Instant::now();
   
       let dialect = GenericDialect {};
       let ast = Parser::parse_sql(&dialect, sql).unwrap();
       let statement = &ast[0];
       let sql_to_rel = SqlToRel::new(&schema_provider);
       let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap();
   
       println!(
           "elapsed time after creating a logical plan: {}",
           now.elapsed().as_millis()
       );
   
       let config = OptimizerContext::default();
       let optimizer = Optimizer::new();
       let optimized_plan = optimizer.optimize(&plan, &config, 
observe).unwrap();
   
       println!(
           "elapsed time after optimization: {}\n",
           now.elapsed().as_millis()
       );
   
       println!("plan:\n{:?}\n", plan);
       println!("optimized plan:\n{:?}", optimized_plan);
   }
   
   fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {}
   
   struct TestSchemaProvider {
       options: ConfigOptions,
       tables: HashMap<String, Arc<dyn TableSource>>,
   }
   
   impl TestSchemaProvider {
       pub fn new() -> Self {
           let mut tables = HashMap::new();
           tables.insert(
               "table1".to_string(),
               create_table_source({
                   let mut fields = Vec::new();
                   for num in 0..700 {
                       fields.push(Field::new(
                           format!("column{}", num + 1),
                           DataType::Int32,
                           false,
                       ))
                   }
                   fields
               }),
           );
   
           Self {
               options: Default::default(),
               tables,
           }
       }
   }
   
   fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> {
       Arc::new(LogicalTableSource::new(Arc::new(
           Schema::new_with_metadata(fields, HashMap::new()),
       )))
   }
   
   impl ContextProvider for TestSchemaProvider {
       fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn 
TableSource>> {
           match self.tables.get(name.table()) {
               Some(table) => Ok(table.clone()),
               _ => Err(DataFusionError::Plan(format!(
                   "Table not found: {}",
                   name.table()
               ))),
           }
       }
   
       fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> {
           None
       }
   
       fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> {
           None
       }
   
       fn get_variable_type(&self, _variable_names: &[String]) -> 
Option<DataType> {
           None
       }
   
       fn options(&self) -> &ConfigOptions {
           &self.options
       }
   }
   ```
   
   And the content of Carfo.toml is as follows:
   
   ```toml
   [package]
   name = "simple_optimizer_test"
   version = "0.1.0"
   edition = "2021"
   
   # See more keys and their definitions at 
https://doc.rust-lang.org/cargo/reference/manifest.html
   
   [dependencies]
   datafusion = "17.0.0"
   mimalloc = "0.1.34"
   
   [profile.release]
   debug = 2
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to