This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a06eec2ed Include offending line number when processing CSV file 
fails (#6653)
5a06eec2ed is described below

commit 5a06eec2ed21992cdd85a1709db0abfad757635e
Author: Piotr Findeisen <[email protected]>
AuthorDate: Thu Oct 31 19:35:35 2024 +0100

    Include offending line number when processing CSV file fails (#6653)
---
 arrow-csv/src/lib.rs        | 21 ++++++++++++++++-----
 arrow-csv/src/reader/mod.rs | 16 ++++++++++++++++
 2 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/arrow-csv/src/lib.rs b/arrow-csv/src/lib.rs
index 28c0d6ebdb..5ce1bc6c33 100644
--- a/arrow-csv/src/lib.rs
+++ b/arrow-csv/src/lib.rs
@@ -32,14 +32,25 @@ use arrow_schema::ArrowError;
 fn map_csv_error(error: csv::Error) -> ArrowError {
     match error.kind() {
         csv::ErrorKind::Io(error) => ArrowError::CsvError(error.to_string()),
-        csv::ErrorKind::Utf8 { pos: _, err } => ArrowError::CsvError(format!(
-            "Encountered UTF-8 error while reading CSV file: {err}"
+        csv::ErrorKind::Utf8 { pos, err } => ArrowError::CsvError(format!(
+            "Encountered UTF-8 error while reading CSV file: {}{}",
+            err,
+            pos.as_ref()
+                .map(|pos| format!(" at line {}", pos.line()))
+                .unwrap_or_default(),
         )),
         csv::ErrorKind::UnequalLengths {
-            expected_len, len, ..
+            pos,
+            expected_len,
+            len,
         } => ArrowError::CsvError(format!(
-            "Encountered unequal lengths between records on CSV file. Expected 
{len} \
-                 records, found {expected_len} records"
+            "Encountered unequal lengths between records on CSV file. Expected 
{} \
+                 records, found {} records{}",
+            len,
+            expected_len,
+            pos.as_ref()
+                .map(|pos| format!(" at line {}", pos.line()))
+                .unwrap_or_default(),
         )),
         _ => ArrowError::CsvError("Error reading CSV file".to_string()),
     }
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index d81f1afee8..c91b436f6c 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -2591,6 +2591,22 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_record_length_mismatch() {
+        let csv = "\
+        a,b,c\n\
+        1,2,3\n\
+        4,5\n\
+        6,7,8";
+        let mut read = Cursor::new(csv.as_bytes());
+        let result = Format::default()
+            .with_header(true)
+            .infer_schema(&mut read, None);
+        assert!(result.is_err());
+        // Include line number in the error message to help locate and fix the 
issue
+        assert_eq!(result.err().unwrap().to_string(), "Csv error: Encountered 
unequal lengths between records on CSV file. Expected 2 records, found 3 
records at line 3");
+    }
+
     #[test]
     fn test_comment() {
         let schema = Schema::new(vec![

Reply via email to