zeodtr commented on issue #5157: URL: https://github.com/apache/arrow-datafusion/issues/5157#issuecomment-1414719714
@alamb I've simplified the benchmark program as follows. (I would like to upload the file as an attachment, but it's impossible in my current environment) It is a single file, named `main.rs`. A lot of it is based on `datafusion/sql/examples/sql.rs` except for the optimizer part. The optimizer part is based on https://crates.io/crates/datafusion-optimizer. ```rust use std::{collections::HashMap, sync::Arc}; use datafusion::{ arrow::datatypes::{DataType, Field, Schema}, common::Result, config::ConfigOptions, error::DataFusionError, logical_expr::{ logical_plan::builder::LogicalTableSource, AggregateUDF, LogicalPlan, ScalarUDF, TableSource, }, optimizer::{optimizer::Optimizer, OptimizerContext, OptimizerRule}, sql::{ planner::{ContextProvider, SqlToRel}, sqlparser::{dialect::GenericDialect, parser::Parser}, TableReference, }, }; #[global_allocator] static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; fn main() { let sql = "select column1 from table1"; let schema_provider = TestSchemaProvider::new(); let now = std::time::Instant::now(); let dialect = GenericDialect {}; let ast = Parser::parse_sql(&dialect, sql).unwrap(); let statement = &ast[0]; let sql_to_rel = SqlToRel::new(&schema_provider); let plan = sql_to_rel.sql_statement_to_plan(statement.clone()).unwrap(); println!( "elapsed time after creating a logical plan: {}", now.elapsed().as_millis() ); let config = OptimizerContext::default(); let optimizer = Optimizer::new(); let optimized_plan = optimizer.optimize(&plan, &config, observe).unwrap(); println!( "elapsed time after optimization: {}\n", now.elapsed().as_millis() ); println!("plan:\n{:?}\n", plan); println!("optimized plan:\n{:?}", optimized_plan); } fn observe(_plan: &LogicalPlan, _rule: &dyn OptimizerRule) {} struct TestSchemaProvider { options: ConfigOptions, tables: HashMap<String, Arc<dyn TableSource>>, } impl TestSchemaProvider { pub fn new() -> Self { let mut tables = HashMap::new(); tables.insert( "table1".to_string(), create_table_source({ let mut fields = Vec::new(); for num in 0..700 { fields.push(Field::new( format!("column{}", num + 1), DataType::Int32, false, )) } fields }), ); Self { options: Default::default(), tables, } } } fn create_table_source(fields: Vec<Field>) -> Arc<dyn TableSource> { Arc::new(LogicalTableSource::new(Arc::new( Schema::new_with_metadata(fields, HashMap::new()), ))) } impl ContextProvider for TestSchemaProvider { fn get_table_provider(&self, name: TableReference) -> Result<Arc<dyn TableSource>> { match self.tables.get(name.table()) { Some(table) => Ok(table.clone()), _ => Err(DataFusionError::Plan(format!( "Table not found: {}", name.table() ))), } } fn get_function_meta(&self, _name: &str) -> Option<Arc<ScalarUDF>> { None } fn get_aggregate_meta(&self, _name: &str) -> Option<Arc<AggregateUDF>> { None } fn get_variable_type(&self, _variable_names: &[String]) -> Option<DataType> { None } fn options(&self) -> &ConfigOptions { &self.options } } ``` And the content of Carfo.toml is as follows: ```toml [package] name = "simple_optimizer_test" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] datafusion = "17.0.0" mimalloc = "0.1.34" [profile.release] debug = 2 ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
