2010YOUY01 commented on code in PR #16182: URL: https://github.com/apache/datafusion/pull/16182#discussion_r2117240352
########## benchmarks/src/util/run.rs: ########## @@ -138,6 +144,13 @@ impl BenchmarkRun { } } + /// Mark current query + pub fn mark_failed(&mut self) { + if let Some(idx) = self.current_case { + self.queries[idx].success = false; + } Review Comment: I suggest to use `unreachable!()` or error on the else branch. ########## benchmarks/src/clickbench.rs: ########## @@ -128,36 +128,70 @@ impl RunOpt { let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?); self.register_hits(&ctx).await?; - let iterations = self.common.iterations; let mut benchmark_run = BenchmarkRun::new(); + let mut failed_queries: Vec<usize> = + Vec::with_capacity(query_range.clone().count()); for query_id in query_range { - let mut millis = Vec::with_capacity(iterations); benchmark_run.start_new_case(&format!("Query {query_id}")); - let sql = queries.get_query(query_id)?; - println!("Q{query_id}: {sql}"); - - for i in 0..iterations { - let start = Instant::now(); - let results = ctx.sql(sql).await?.collect().await?; - let elapsed = start.elapsed(); - let ms = elapsed.as_secs_f64() * 1000.0; - millis.push(ms); - let row_count: usize = results.iter().map(|b| b.num_rows()).sum(); - println!( - "Query {query_id} iteration {i} took {ms:.1} ms and returned {row_count} rows" - ); - benchmark_run.write_iter(elapsed, row_count); + let query_run = self.benchmark_query(&queries, query_id, &ctx).await; + match query_run { + Ok(query_results) => { + for iter in query_results { + benchmark_run.write_iter(iter.elapsed, iter.row_count); + } + } + Err(e) => { + benchmark_run.mark_failed(); + failed_queries.push(query_id); + eprintln!("Query {query_id} failed: {e}"); + } } - if self.common.debug { - ctx.sql(sql).await?.explain(false, false)?.show().await?; - } - let avg = millis.iter().sum::<f64>() / millis.len() as f64; - println!("Query {query_id} avg time: {avg:.2} ms"); } benchmark_run.maybe_write_json(self.output_path.as_ref())?; + if !failed_queries.is_empty() { Review Comment: nit: I think it's better to implement this within `BenchmarkRun`, then we don't have to track failed queries in the loop: ``` benchmark_run.maybe_print_failures()?; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org