alamb commented on code in PR #4547:
URL: https://github.com/apache/arrow-datafusion/pull/4547#discussion_r1045076554
##########
datafusion/core/tests/sqllogictests/src/main.rs:
##########
@@ -172,19 +173,42 @@ async fn context_for_test_file(file_name: &str) ->
SessionContext {
}
}
-fn format_batches(batches: Vec<RecordBatch>) -> Result<String> {
+fn convert_batches(batches: Vec<RecordBatch>) -> Result<DBOutput> {
let mut bytes = vec![];
+ if batches.is_empty() {
+ return Ok(DBOutput::StatementComplete(0));
+ }
+ // TODO: use the actual types
+ let types = vec![ColumnType::Any; batches[0].num_columns()];
+
{
- let builder =
WriterBuilder::new().has_headers(false).with_delimiter(b' ');
+ let builder = WriterBuilder::new()
+ .has_headers(false)
+ .with_delimiter(b'\t');
let mut writer = builder.build(&mut bytes);
for batch in batches {
writer.write(&normalize_batch(batch)).unwrap();
}
}
- Ok(String::from_utf8(bytes).unwrap())
+ let res = String::from_utf8(bytes).unwrap();
+ let rows = res
+ .lines()
+ .map(|s| {
+ s.split("\t")
+ .map(|s| {
+ if s.is_empty() {
+ "NULL".to_string()
+ } else {
+ s.to_string()
+ }
+ })
+ .collect()
+ })
+ .collect();
Review Comment:
Here is a PR that avoids reparsing CSV:
https://github.com/apache/arrow-datafusion/pull/4578
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]