alamb commented on code in PR #9548:
URL: https://github.com/apache/arrow-datafusion/pull/9548#discussion_r1520436449
##########
datafusion/core/src/datasource/file_format/parquet.rs:
##########
@@ -717,7 +734,18 @@ impl DataSink for ParquetSink {
while let Some(result) = file_write_tasks.join_next().await {
match result {
Ok(r) => {
- row_count += r?;
+ let (path, file_metadata) = r?;
+ row_count += file_metadata.num_rows;
+ let mut written_files = self.written.lock();
+ written_files
+ .try_insert(path.clone(), file_metadata)
+ .map_err(|e| {
+ DataFusionError::Internal(format!(
+ "duplicate entry detected for partitioned file
{}: {}",
+ &path, e
+ ))
Review Comment:
Can you please use the `internal_err!` macro here instead -- something like
```suggestion
.map_err(|e| internal_err!("duplicate entry detected
for partitioned file {path}: {e}"))
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]