tustvold commented on code in PR #4522:
URL: https://github.com/apache/arrow-datafusion/pull/4522#discussion_r1040141933
##########
datafusion/core/tests/memory_limit.rs:
##########
@@ -66,23 +68,21 @@ async fn group_by_hash() {
run_limit_test(
// group by dict column
"select count(*) from t GROUP BY service, host, pod, container",
- "Resources exhausted: Cannot spill GroupBy Hash Accumulators",
+ "Resources exhausted: Failed to allocate additional",
+ 1_000,
)
.await
}
/// 50 byte memory limit
-const MEMORY_LIMIT_BYTES: usize = 50;
const MEMORY_FRACTION: f64 = 0.95;
/// runs the specified query against 1000 rows with a 50
/// byte memory limit and no disk manager enabled.
-async fn run_limit_test(query: &str, expected_error: &str) {
- let generator = AccessLogGenerator::new().with_row_limit(Some(1000));
-
- let batches: Vec<RecordBatch> = generator
- // split up into more than one batch, as the size limit in sort is not
enforced until the second batch
- .flat_map(stagger_batch)
Review Comment:
This change is necessary because a sliced batch reports the size of the
underlying buffers, and so the accounting is messed up
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]