westonpace commented on a change in pull request #11857:
URL: https://github.com/apache/arrow/pull/11857#discussion_r763293547



##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -159,19 +160,22 @@ class UnquotedColumnPopulator : public ColumnPopulator {
     VisitArrayDataInline<StringType>(
         *casted_array_->data(),
         [&](arrow::util::string_view s) {
-          int32_t next_column_offset = static_cast<int32_t>(s.length()) + 
/*end_char*/ 1;
+          int32_t next_column_offset = static_cast<int32_t>(
+              s.length() + /*end_chars(, or eol)*/ end_chars_.size());

Review comment:
       This inline comment is probably not needed anymore.  Before it was 
explaining the significance of the `1` but since it is `end_chars_.size()` now 
it should be pretty obvious.

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -184,9 +188,9 @@ class UnquotedColumnPopulator : public ColumnPopulator {
 // a quote character (") and escaping is done my adding another quote.
 class QuotedColumnPopulator : public ColumnPopulator {
  public:
-  QuotedColumnPopulator(MemoryPool* pool, char end_char,
+  QuotedColumnPopulator(MemoryPool* pool, std::string& end_chars,
                         std::shared_ptr<Buffer> null_string)
-      : ColumnPopulator(pool, end_char, std::move(null_string)) {}
+      : ColumnPopulator(pool, end_chars, std::move(null_string)) {}

Review comment:
       Same as above.  Const reference or move-construct

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -159,19 +160,22 @@ class UnquotedColumnPopulator : public ColumnPopulator {
     VisitArrayDataInline<StringType>(
         *casted_array_->data(),
         [&](arrow::util::string_view s) {
-          int32_t next_column_offset = static_cast<int32_t>(s.length()) + 
/*end_char*/ 1;
+          int32_t next_column_offset = static_cast<int32_t>(
+              s.length() + /*end_chars(, or eol)*/ end_chars_.size());
           memcpy((output + *offsets - next_column_offset), s.data(), 
s.length());
-          *(output + *offsets - 1) = end_char_;
-          *offsets -= next_column_offset;
+          memcpy((output + *offsets - end_chars_.size()), end_chars_.c_str(),
+                 end_chars_.size());
+          *offsets -= static_cast<int32_t>(next_column_offset);
           offsets++;
         },
         [&]() {
           // For nulls, the configured null value string is copied into the 
output.
-          int32_t next_column_offset =
-              static_cast<int32_t>(null_string_->size()) + /*end_char*/ 1;
+          int32_t next_column_offset = static_cast<int32_t>(
+              null_string_->size() + /*end_chars(, or eol)*/ 
end_chars_.size());

Review comment:
       Again, can drop the inline comment.

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -282,20 +287,20 @@ struct PopulatorFactory {
                   is_null_type<TypeClass>::value || 
is_temporal_type<TypeClass>::value,
               Status>
   Visit(const TypeClass& type) {
-    populator = new UnquotedColumnPopulator(pool, end_char, null_string);
+    populator = new UnquotedColumnPopulator(pool, end_chars, null_string);
     return Status::OK();
   }
 
-  char end_char;
+  std::string end_chars;
   std::shared_ptr<Buffer> null_string;
   MemoryPool* pool;
   ColumnPopulator* populator;
 };
 
 Result<std::unique_ptr<ColumnPopulator>> MakePopulator(
-    const Field& field, char end_char, std::shared_ptr<Buffer> null_string,
+    const Field& field, std::string& end_chars, std::shared_ptr<Buffer> 
null_string,
     MemoryPool* pool) {
-  PopulatorFactory factory{end_char, std::move(null_string), pool, nullptr};
+  PopulatorFactory factory{end_chars, std::move(null_string), pool, nullptr};

Review comment:
       Same as above, const reference or move copy

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -318,9 +323,9 @@ class CSVWriterImpl : public ipc::RecordBatchWriter {
 
     std::vector<std::unique_ptr<ColumnPopulator>> 
populators(schema->num_fields());
     for (int col = 0; col < schema->num_fields(); col++) {
-      char end_char = col < schema->num_fields() - 1 ? ',' : '\n';
+      std::string end_chars = col < schema->num_fields() - 1 ? "," : 
options.eol;

Review comment:
       ```suggestion
         const std::string& end_chars = col < schema->num_fields() - 1 ? "," : 
options.eol;
   ```

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -159,19 +160,22 @@ class UnquotedColumnPopulator : public ColumnPopulator {
     VisitArrayDataInline<StringType>(
         *casted_array_->data(),
         [&](arrow::util::string_view s) {
-          int32_t next_column_offset = static_cast<int32_t>(s.length()) + 
/*end_char*/ 1;
+          int32_t next_column_offset = static_cast<int32_t>(
+              s.length() + /*end_chars(, or eol)*/ end_chars_.size());
           memcpy((output + *offsets - next_column_offset), s.data(), 
s.length());
-          *(output + *offsets - 1) = end_char_;
-          *offsets -= next_column_offset;
+          memcpy((output + *offsets - end_chars_.size()), end_chars_.c_str(),
+                 end_chars_.size());
+          *offsets -= static_cast<int32_t>(next_column_offset);

Review comment:
       Why is this cast needed?  `next_column_offset` should be `int32_t`

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -142,9 +143,9 @@ char* EscapeReverse(arrow::util::string_view s, char* 
out_end) {
 // from a cast does not require quoting or escaping.
 class UnquotedColumnPopulator : public ColumnPopulator {
  public:
-  explicit UnquotedColumnPopulator(MemoryPool* memory_pool, char end_char,
+  explicit UnquotedColumnPopulator(MemoryPool* memory_pool, std::string& 
end_chars,
                                    std::shared_ptr<Buffer> null_string_)
-      : ColumnPopulator(memory_pool, end_char, std::move(null_string_)) {}
+      : ColumnPopulator(memory_pool, end_chars, std::move(null_string_)) {}

Review comment:
       Either change `end_chars` to be a `const std::string&` or take away the 
`&` here and use `std::move`.

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -223,25 +227,26 @@ class QuotedColumnPopulator : public ColumnPopulator {
             memcpy(row_end - next_column_offset + /*quote_offset=*/1, s.data(),
                    s.length());
           } else {
-            // Adjust row_end by 3: 1 quote char, 1 end char and 1 to position 
at the
-            // first position to write to.
-            next_column_offset =
-                static_cast<int32_t>(row_end - EscapeReverse(s, row_end - 3));
+            // Adjust row_end by 2 + end_chars_.size(): 1 quote char, 
end_chars_.size()
+            // and 1 to position at the first position to write to.
+            next_column_offset = static_cast<int32_t>(
+                row_end - EscapeReverse(s, row_end - 2 - end_chars_.size()));
           }
           *(row_end - next_column_offset) = '"';
-          *(row_end - 2) = '"';
-          *(row_end - 1) = end_char_;
+          *(row_end - end_chars_.size() - 1) = '"';
+          memcpy(row_end - end_chars_.size(), end_chars_.data(), 
end_chars_.length());
           *offsets -= next_column_offset;
           offsets++;
           needs_escaping++;
         },
         [&]() {
           // For nulls, the configured null value string is copied into the 
output.
-          int32_t next_column_offset =
-              static_cast<int32_t>(null_string_->size()) + /*end_char*/ 1;
+          int32_t next_column_offset = static_cast<int32_t>(
+              null_string_->size() + /*end_chars(, or eol)*/ 
end_chars_.size());

Review comment:
       ```suggestion
                 null_string_->size() + end_chars_.size());
   ```

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -87,8 +87,9 @@ constexpr int64_t kQuoteDelimiterCount = kQuoteCount + 
/*end_char*/ 1;
 // populators (it populates data backwards).
 class ColumnPopulator {
  public:
-  ColumnPopulator(MemoryPool* pool, char end_char, std::shared_ptr<Buffer> 
null_string)
-      : end_char_(end_char), null_string_(std::move(null_string)), pool_(pool) 
{}
+  ColumnPopulator(MemoryPool* pool, std::string end_chars,
+                  std::shared_ptr<Buffer> null_string)
+      : end_chars_(end_chars), null_string_(std::move(null_string)), 
pool_(pool) {}

Review comment:
       ```suggestion
         : end_chars_(std::move(end_chars)), 
null_string_(std::move(null_string)), pool_(pool) {}
   ```
   Nit: I'd guess most of the time this is going to be a no-op as the string is 
interned but just from a consistency standpoint it's probably good to have.
   
   However, the easiest thing to do might be to make `end_chars_` a `const 
std::string&`

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -423,10 +431,13 @@ class CSVWriterImpl : public ipc::RecordBatchWriter {
       RETURN_NOT_OK(
           column_populators_[col]->UpdateRowLengths(*batch.column(col), 
offsets_.data()));
     }
-    // Calculate cumulalative offsets for each row (including delimiters).
-    offsets_[0] += batch.num_columns();
+    // Calculate cumulative offsets for each row (including delimiters).
+    // ',' * num_columns - 1(last column doesn't have ,) + eol
+    int32_t delimiters_length =
+        static_cast<int32_t>(batch.num_columns() - 1 + options_.eol.size());
+    offsets_[0] += delimiters_length;
     for (int64_t row = 1; row < batch.num_rows(); row++) {
-      offsets_[row] += offsets_[row - 1] + /*delimiter lengths*/ 
batch.num_columns();
+      offsets_[row] += offsets_[row - 1] + /*delimiter lengths*/ 
delimiters_length;

Review comment:
       ```suggestion
         offsets_[row] += offsets_[row - 1] + delimiters_length;
   ```

##########
File path: cpp/src/arrow/csv/writer.cc
##########
@@ -318,9 +323,9 @@ class CSVWriterImpl : public ipc::RecordBatchWriter {
 
     std::vector<std::unique_ptr<ColumnPopulator>> 
populators(schema->num_fields());
     for (int col = 0; col < schema->num_fields(); col++) {
-      char end_char = col < schema->num_fields() - 1 ? ',' : '\n';
+      std::string end_chars = col < schema->num_fields() - 1 ? "," : 
options.eol;

Review comment:
       If you go the route of making the string a const reference in the 
populators you might need to move this `","` to a file-scoped static constant.

##########
File path: cpp/src/arrow/csv/writer_test.cc
##########
@@ -46,14 +46,29 @@ void PrintTo(const WriterTestParams& p, std::ostream* os) {
   *os << "WriterTestParams(" << reinterpret_cast<const void*>(&p) << ")";
 }
 
-WriteOptions DefaultTestOptions(bool include_header, const std::string& 
null_string) {
+WriteOptions DefaultTestOptions(bool include_header, const std::string& 
null_string,
+                                const std::string eol = "\n") {
   WriteOptions options;
   options.batch_size = 5;
   options.include_header = include_header;
   options.null_string = null_string;
+  options.eol = eol;
   return options;
 }
 
+std::string UtilGetExpectedWithEOL(std::string eol) {

Review comment:
       ```suggestion
   std::string UtilGetExpectedWithEOL(const std::string& eol) {
   ```

##########
File path: cpp/src/arrow/csv/writer_test.cc
##########
@@ -46,14 +46,29 @@ void PrintTo(const WriterTestParams& p, std::ostream* os) {
   *os << "WriterTestParams(" << reinterpret_cast<const void*>(&p) << ")";
 }
 
-WriteOptions DefaultTestOptions(bool include_header, const std::string& 
null_string) {
+WriteOptions DefaultTestOptions(bool include_header, const std::string& 
null_string,
+                                const std::string eol = "\n") {

Review comment:
       ```suggestion
                                   const std::string& eol = "\n") {
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to