johanpel commented on a change in pull request #11836:
URL: https://github.com/apache/arrow/pull/11836#discussion_r761105171



##########
File path: cpp/src/arrow/csv/writer_test.cc
##########
@@ -69,25 +70,49 @@ std::vector<WriterTestParams> GenerateTestCases() {
                              { "a": 124, "b\"": "a\"\"b\"" },
                              { "d": 0 },
                              { "e": 86400000 },
-                             { "f": 1078016523 }])";
-  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
// line 1
-                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
// line 2
-                                        R"(,"abcd",5467,,,)" + "\n" +          
// line 3
-                                        R"(,,,,,)" + "\n" +                    
// line 4
-                                        R"(546,"",517,,,)" + "\n" +            
// line 5
-                                        R"(124,"a""""b""",,,,)" + "\n" +       
// line 6
-                                        R"(,,,1970-01-01,,)" + "\n" +          
// line 7
-                                        R"(,,,,1970-01-02,)" + "\n" +          
// line 8
-                                        R"(,,,,,2004-02-29 01:02:03)" + "\n";  
// line 9
+                             { "f": 1078016523 },
+                             { "b\"": "NA" }])";
+  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
 // line 1
+                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
 // line 2
+                                        R"(,"abcd",5467,,,)" + "\n" +          
 // line 3
+                                        R"(,,,,,)" + "\n" +                    
 // line 4
+                                        R"(546,"",517,,,)" + "\n" +            
 // line 5
+                                        R"(124,"a""""b""",,,,)" + "\n" +       
 // line 6
+                                        R"(,,,1970-01-01,,)" + "\n" +          
 // line 7
+                                        R"(,,,,1970-01-02,)" + "\n" +          
 // line 8
+                                        R"(,,,,,2004-02-29 01:02:03)" + "\n" + 
 // line 9
+                                        R"(,"NA",,,,)" + "\n";                 
 // line 10
+
   std::string expected_header = std::string(R"("a","b""","c ","d","e","f")") + 
"\n";
 
+  auto schema_custom_na = schema({field("g", uint64()), field("h", utf8())});
+
+  auto populated_batch_custom_na = R"([{"g": 42, "h": "NA"},
+                                        {}])";
+
+  std::string expected_custom_na = std::string(R"(42,"NA")") + "\n" +  // line 
1
+                                   R"(NA,NA)" + "\n";                  // line 
2
+
+  std::string expected_custom_quoted_na = std::string(R"(42,"NA")") + "\n" +  
// line 1
+                                          R"(""NA"",""NA"")" + "\n";          
// line 2
+
   return std::vector<WriterTestParams>{
-      {abc_schema, "[]", DefaultTestOptions(/*header=*/false), ""},
-      {abc_schema, "[]", DefaultTestOptions(/*header=*/true), expected_header},
-      {abc_schema, populated_batch, DefaultTestOptions(/*header=*/false),
+      {abc_schema, "[]", DefaultTestOptions(/*include_header=*/false, 
/*null_string=*/""),
+       ""},
+      {abc_schema, "[]", DefaultTestOptions(/*include_header=*/true, 
/*null_string=*/""),
+       expected_header},
+      {abc_schema, populated_batch,
+       DefaultTestOptions(/*include_header=*/false, /*null_string=*/""),
        expected_without_header},
-      {abc_schema, populated_batch, DefaultTestOptions(/*header=*/true),
-       expected_header + expected_without_header}};
+      {abc_schema, populated_batch,
+       DefaultTestOptions(/*include_header=*/true, /*null_string=*/""),
+       expected_header + expected_without_header},
+      {schema_custom_na, populated_batch_custom_na,
+       DefaultTestOptions(/*include_header=*/false, /*null_string=*/"NA"),
+       expected_custom_na},
+      {schema_custom_na, populated_batch_custom_na,
+       DefaultTestOptions(/*include_header=*/false, /*null_string=*/R"("NA")"),
+       expected_custom_quoted_na}};

Review comment:
       If for some reason the user uses `"NA"` (including quotes, so four 
characters) as the null value, whatever reads it would need to understand that 
after removing the escapes this does not represent the string 'NA' (two chars), 
but would need to parse `"NA"` as a null.
   
   If we actually want to prevent users from making such a mistake we should 
change the CSV write option struct to have an API for the null value string 
that checks it for quotes and rejects it if it does.
   
   The point is if you specify something weird as a null value for the writer, 
but not for the reader, the latter will not recognize it as a null.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to