This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 5a06eec2ed Include offending line number when processing CSV file
fails (#6653)
5a06eec2ed is described below
commit 5a06eec2ed21992cdd85a1709db0abfad757635e
Author: Piotr Findeisen <[email protected]>
AuthorDate: Thu Oct 31 19:35:35 2024 +0100
Include offending line number when processing CSV file fails (#6653)
---
arrow-csv/src/lib.rs | 21 ++++++++++++++++-----
arrow-csv/src/reader/mod.rs | 16 ++++++++++++++++
2 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/arrow-csv/src/lib.rs b/arrow-csv/src/lib.rs
index 28c0d6ebdb..5ce1bc6c33 100644
--- a/arrow-csv/src/lib.rs
+++ b/arrow-csv/src/lib.rs
@@ -32,14 +32,25 @@ use arrow_schema::ArrowError;
fn map_csv_error(error: csv::Error) -> ArrowError {
match error.kind() {
csv::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
- csv::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
- "Encountered UTF-8 error while reading CSV file: {err}"
+ csv::ErrorKind::Utf8 { pos, err } => ArrowError::CsvError(format!(
+ "Encountered UTF-8 error while reading CSV file: {}{}",
+ err,
+ pos.as_ref()
+ .map(|pos| format!(" at line {}", pos.line()))
+ .unwrap_or_default(),
)),
csv::ErrorKind::UnequalLengths {
- expected_len, len, ..
+ pos,
+ expected_len,
+ len,
} => ArrowError::CsvError(format!(
- "Encountered unequal lengths between records on CSV file. Expected
{len} \
- records, found {expected_len} records"
+ "Encountered unequal lengths between records on CSV file. Expected
{} \
+ records, found {} records{}",
+ len,
+ expected_len,
+ pos.as_ref()
+ .map(|pos| format!(" at line {}", pos.line()))
+ .unwrap_or_default(),
)),
_ => ArrowError::CsvError("Error reading CSV file".to_string()),
}
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index d81f1afee8..c91b436f6c 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -2591,6 +2591,22 @@ mod tests {
}
}
+ #[test]
+ fn test_record_length_mismatch() {
+ let csv = "\
+ a,b,c\n\
+ 1,2,3\n\
+ 4,5\n\
+ 6,7,8";
+ let mut read = Cursor::new(csv.as_bytes());
+ let result = Format::default()
+ .with_header(true)
+ .infer_schema(&mut read, None);
+ assert!(result.is_err());
+ // Include line number in the error message to help locate and fix the
issue
+ assert_eq!(result.err().unwrap().to_string(), "Csv error: Encountered
unequal lengths between records on CSV file. Expected 2 records, found 3
records at line 3");
+ }
+
#[test]
fn test_comment() {
let schema = Schema::new(vec![