johanpel commented on a change in pull request #11836:
URL: https://github.com/apache/arrow/pull/11836#discussion_r761097732



##########
File path: cpp/src/arrow/csv/writer_test.cc
##########
@@ -69,25 +70,49 @@ std::vector<WriterTestParams> GenerateTestCases() {
                              { "a": 124, "b\"": "a\"\"b\"" },
                              { "d": 0 },
                              { "e": 86400000 },
-                             { "f": 1078016523 }])";
-  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
// line 1
-                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
// line 2
-                                        R"(,"abcd",5467,,,)" + "\n" +          
// line 3
-                                        R"(,,,,,)" + "\n" +                    
// line 4
-                                        R"(546,"",517,,,)" + "\n" +            
// line 5
-                                        R"(124,"a""""b""",,,,)" + "\n" +       
// line 6
-                                        R"(,,,1970-01-01,,)" + "\n" +          
// line 7
-                                        R"(,,,,1970-01-02,)" + "\n" +          
// line 8
-                                        R"(,,,,,2004-02-29 01:02:03)" + "\n";  
// line 9
+                             { "f": 1078016523 },
+                             { "b\"": "NA" }])";
+  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
 // line 1
+                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
 // line 2
+                                        R"(,"abcd",5467,,,)" + "\n" +          
 // line 3
+                                        R"(,,,,,)" + "\n" +                    
 // line 4
+                                        R"(546,"",517,,,)" + "\n" +            
 // line 5
+                                        R"(124,"a""""b""",,,,)" + "\n" +       
 // line 6
+                                        R"(,,,1970-01-01,,)" + "\n" +          
 // line 7
+                                        R"(,,,,1970-01-02,)" + "\n" +          
 // line 8
+                                        R"(,,,,,2004-02-29 01:02:03)" + "\n" + 
 // line 9
+                                        R"(,"NA",,,,)" + "\n";                 
 // line 10
+
   std::string expected_header = std::string(R"("a","b""","c ","d","e","f")") + 
"\n";
 
+  auto schema_custom_na = schema({field("g", uint64()), field("h", utf8())});
+
+  auto populated_batch_custom_na = R"([{"g": 42, "h": "NA"},
+                                        {}])";
+
+  std::string expected_custom_na = std::string(R"(42,"NA")") + "\n" +  // line 
1
+                                   R"(NA,NA)" + "\n";                  // line 
2
+
+  std::string expected_custom_quoted_na = std::string(R"(42,"NA")") + "\n" +  
// line 1
+                                          R"(""NA"",""NA"")" + "\n";          
// line 2

Review comment:
       This test case is for when a user supplies `"NA"` (including actual 
quotes in the value) as the null value string.
   The idea is that all quotes in the null value string are escaped before 
rendering it for null values. This way it won't be confused with a rendered 
valid value string "NA". Hope this makes sense?

##########
File path: cpp/src/arrow/csv/writer_test.cc
##########
@@ -69,25 +70,49 @@ std::vector<WriterTestParams> GenerateTestCases() {
                              { "a": 124, "b\"": "a\"\"b\"" },
                              { "d": 0 },
                              { "e": 86400000 },
-                             { "f": 1078016523 }])";
-  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
// line 1
-                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
// line 2
-                                        R"(,"abcd",5467,,,)" + "\n" +          
// line 3
-                                        R"(,,,,,)" + "\n" +                    
// line 4
-                                        R"(546,"",517,,,)" + "\n" +            
// line 5
-                                        R"(124,"a""""b""",,,,)" + "\n" +       
// line 6
-                                        R"(,,,1970-01-01,,)" + "\n" +          
// line 7
-                                        R"(,,,,1970-01-02,)" + "\n" +          
// line 8
-                                        R"(,,,,,2004-02-29 01:02:03)" + "\n";  
// line 9
+                             { "f": 1078016523 },
+                             { "b\"": "NA" }])";
+  std::string expected_without_header = std::string("1,,-1,,,") + "\n" +       
 // line 1
+                                        R"(1,"abc""efg",2324,,,)" + "\n" +     
 // line 2
+                                        R"(,"abcd",5467,,,)" + "\n" +          
 // line 3
+                                        R"(,,,,,)" + "\n" +                    
 // line 4
+                                        R"(546,"",517,,,)" + "\n" +            
 // line 5
+                                        R"(124,"a""""b""",,,,)" + "\n" +       
 // line 6
+                                        R"(,,,1970-01-01,,)" + "\n" +          
 // line 7
+                                        R"(,,,,1970-01-02,)" + "\n" +          
 // line 8
+                                        R"(,,,,,2004-02-29 01:02:03)" + "\n" + 
 // line 9
+                                        R"(,"NA",,,,)" + "\n";                 
 // line 10
+
   std::string expected_header = std::string(R"("a","b""","c ","d","e","f")") + 
"\n";
 
+  auto schema_custom_na = schema({field("g", uint64()), field("h", utf8())});
+
+  auto populated_batch_custom_na = R"([{"g": 42, "h": "NA"},
+                                        {}])";
+
+  std::string expected_custom_na = std::string(R"(42,"NA")") + "\n" +  // line 
1
+                                   R"(NA,NA)" + "\n";                  // line 
2
+
+  std::string expected_custom_quoted_na = std::string(R"(42,"NA")") + "\n" +  
// line 1
+                                          R"(""NA"",""NA"")" + "\n";          
// line 2

Review comment:
       This test case is for when a user supplies `"NA"` (including actual 
quotes) as the null value string.
   The idea is that all quotes in the null value string are escaped before 
rendering it for null values. This way it won't be confused with a rendered 
valid value string "NA". Hope this makes sense?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to