alamb commented on code in PR #4818:
URL: https://github.com/apache/arrow-datafusion/pull/4818#discussion_r1070567335
##########
datafusion/core/src/physical_plan/file_format/csv.rs:
##########
@@ -444,24 +444,11 @@ mod tests {
assert_eq!(14, csv.schema().fields().len());
let mut it = csv.execute(0, task_ctx)?;
- let batch = it.next().await.unwrap()?;
- assert_eq!(14, batch.num_columns());
- assert_eq!(5, batch.num_rows());
-
- let expected = vec![
-
"+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+-------------+",
- "| c1 | c2 | c3 | c4 | c5 | c6 | c7
| c8 | c9 | c10 | c11 | c12
| c13 | missing_col |",
-
"+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+-------------+",
- "| c | 2 | 1 | 18109 | 2033001162 | -6513304855495910254 | 25
| 43062 | 1491205016 | 5863949479783605708 | 0.110830784 | 0.9294097332465232
| 6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW | |",
- "| d | 5 | -40 | 22614 | 706441268 | -7542719935673075327 |
155 | 14337 | 3373581039 | 11720144131976083864 | 0.69632107 |
0.3114712539863804 | C2GT5KVyOPZpgKVl110TyZO0NcJ434 | |",
- "| b | 1 | 29 | -18218 | 994303988 | 5983957848665088916 |
204 | 9489 | 3275293996 | 14857091259186476033 | 0.53840446 |
0.17909035118828576 | AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz | |",
- "| a | 1 | -85 | -15154 | 1171968280 | 1919439543497968449 | 77
| 52286 | 774637006 | 12101411955859039553 | 0.12285209 | 0.6864391962767343
| 0keZ5G8BffGwgF2RwQD59TFzMStxCB | |",
- "| b | 5 | -82 | 22080 | 1824882165 | 7373730676428214987 |
208 | 34331 | 3342719438 | 3330177516592499461 | 0.82634634 |
0.40975383525297016 | Ig1QcuKsjHXkproePdERo2w0mYzIqd | |",
-
"+----+----+-----+--------+------------+----------------------+-----+-------+------------+----------------------+-------------+---------------------+--------------------------------+-------------+",
- ];
-
- crate::assert_batches_eq!(expected, &[batch]);
-
+ let err = it.next().await.unwrap().unwrap_err().to_string();
Review Comment:
So I looked at this test more carefully -- I think it is supposed to be
demonstrating that we can read from a CSV file where the schema in the file is
a subset of the schema in the plan and the columns are supposed to get padded
with nulls
It appears to have been added by @thinkharderdev in
https://github.com/apache/arrow-datafusion/commit/7bec762d1f1ebef4801af2eefd7a5033c474fe77
last year.
I think we should update the code so that it continues to pass.
I can try and look at it later this week if no one else has a chance
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]