johanpel commented on a change in pull request #11849:
URL: https://github.com/apache/arrow/pull/11849#discussion_r763270044



##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -155,27 +161,61 @@ class UnquotedColumnPopulator : public ColumnPopulator {
     return Status::OK();
   }
 
-  void PopulateColumns(char* output, int32_t* offsets) const override {
-    VisitArrayDataInline<StringType>(
-        *casted_array_->data(),
-        [&](arrow::util::string_view s) {
-          int32_t next_column_offset = static_cast<int32_t>(s.length()) + 
/*end_char*/ 1;
-          memcpy((output + *offsets - next_column_offset), s.data(), 
s.length());
-          *(output + *offsets - 1) = end_char_;
-          *offsets -= next_column_offset;
-          offsets++;
-        },
-        [&]() {
-          // For nulls, the configured null value string is copied into the 
output.
-          int32_t next_column_offset =
-              static_cast<int32_t>(null_string_->size()) + /*end_char*/ 1;
-          memcpy((output + *offsets - next_column_offset), 
null_string_->data(),
-                 null_string_->size());
-          *(output + *offsets - 1) = end_char_;
-          *offsets -= next_column_offset;
-          offsets++;
-        });
+  Status PopulateColumns(char* output, int32_t* offsets) const override {
+    // Function applied to valid values cast to string.
+    auto valid_function = [&](arrow::util::string_view s) {
+      int32_t next_column_offset = static_cast<int32_t>(s.length()) + 
/*end_char*/ 1;
+      memcpy((output + *offsets - next_column_offset), s.data(), s.length());
+      *(output + *offsets - 1) = end_char_;
+      *offsets -= next_column_offset;
+      offsets++;
+      return Status::OK();
+    };
+
+    // Function applied to null values cast to string.
+    auto null_function = [&]() {
+      // For nulls, the configured null value string is copied into the output.
+      int32_t next_column_offset =
+          static_cast<int32_t>(null_string_->size()) + /*end_char*/ 1;
+      memcpy((output + *offsets - next_column_offset), null_string_->data(),
+             null_string_->size());
+      *(output + *offsets - 1) = end_char_;
+      *offsets -= next_column_offset;
+      offsets++;
+      return Status::OK();
+    };
+
+    if (reject_values_with_quotes_) {
+      // When using this UnquotedColumnPopulator on values that, after 
casting, could
+      // produce quotes, we need to return an error in accord with RFC4180. We 
need to
+      // precede valid_func with a check.
+      return VisitArrayDataInline<StringType>(
+          *casted_array_->data(),
+          [&](arrow::util::string_view s) {
+            ARROW_RETURN_NOT_OK(CheckStringHasNoQuotes(s));
+            return valid_function(s);
+          },
+          null_function);
+    } else {
+      // Populate without checking and rejecting values with quotes.
+      return VisitArrayDataInline<StringType>(*casted_array_->data(), 
valid_function,
+                                              null_function);
+    }
+  }
+
+ private:
+  static Status CheckStringHasNoQuotes(const util::string_view& s) {
+    if (CountQuotes(s) > 0) {

Review comment:
       Good call, I added some code to reject these as well.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to