mapleFU commented on code in PR #40271:
URL: https://github.com/apache/arrow/pull/40271#discussion_r1557718484


##########
cpp/src/arrow/csv/writer.cc:
##########
@@ -146,7 +165,7 @@ class ColumnPopulator {
 
  protected:
   virtual Status UpdateRowLengths(int64_t* row_lengths) = 0;
-  std::shared_ptr<StringArray> casted_array_;
+  std::shared_ptr<Array> array_;

Review Comment:
   Can you just a comment for `array_` that it would be a `StringArray` or 
`LargeStringArray`?



##########
cpp/src/arrow/csv/writer.cc:
##########
@@ -128,10 +139,18 @@ class ColumnPopulator {
     // Populators are intented to be applied to reasonably small data.  In 
most cases
     // threading overhead would not be justified.
     ctx.set_use_threads(false);
-    ASSIGN_OR_RAISE(
-        std::shared_ptr<Array> casted,
-        compute::Cast(data, /*to_type=*/utf8(), compute::CastOptions(), &ctx));
-    casted_array_ = checked_pointer_cast<StringArray>(casted);
+    if (data.type() && is_large_binary_like(data.type()->id())) {
+      ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(),
+                                            compute::CastOptions(), &ctx));
+    } else {
+      auto casted = compute::Cast(data, /*to_type=*/utf8(), 
compute::CastOptions(), &ctx);

Review Comment:
   wonder which scenerio would this happens? When a extremly huge array is 
passed in `data`? Should an error code being checked here or just not casting 
it?



##########
cpp/src/arrow/csv/writer.cc:
##########
@@ -108,6 +108,17 @@ int64_t CountQuotes(std::string_view s) {
 constexpr int64_t kQuoteCount = 2;
 constexpr int64_t kQuoteDelimiterCount = kQuoteCount + /*end_char*/ 1;
 
+#ifndef CSV_WRITER_DISPATCH_STRING_ARRAY_TYPE
+#define CSV_WRITER_DISPATCH_STRING_ARRAY_TYPE(array, func, ...) \
+  do {                                                          \
+    if (ARROW_PREDICT_TRUE(array->type_id() == Type::STRING)) { \
+      return func<StringArray>(__VA_ARGS__);                    \
+    } else {                                                    \
+      return func<LargeStringArray>(__VA_ARGS__);               \

Review Comment:
   Also dcheck the type is large_string here?



##########
cpp/src/arrow/csv/writer.cc:
##########
@@ -108,6 +108,17 @@ int64_t CountQuotes(std::string_view s) {
 constexpr int64_t kQuoteCount = 2;
 constexpr int64_t kQuoteDelimiterCount = kQuoteCount + /*end_char*/ 1;
 
+#ifndef CSV_WRITER_DISPATCH_STRING_ARRAY_TYPE
+#define CSV_WRITER_DISPATCH_STRING_ARRAY_TYPE(array, func, ...) \

Review Comment:
   I also thinks a template helper function or just write the code is a better 
way



##########
cpp/src/arrow/csv/writer.cc:
##########
@@ -128,10 +139,18 @@ class ColumnPopulator {
     // Populators are intented to be applied to reasonably small data.  In 
most cases
     // threading overhead would not be justified.
     ctx.set_use_threads(false);
-    ASSIGN_OR_RAISE(
-        std::shared_ptr<Array> casted,
-        compute::Cast(data, /*to_type=*/utf8(), compute::CastOptions(), &ctx));
-    casted_array_ = checked_pointer_cast<StringArray>(casted);
+    if (data.type() && is_large_binary_like(data.type()->id())) {
+      ASSIGN_OR_RAISE(array_, compute::Cast(data, /*to_type=*/large_utf8(),
+                                            compute::CastOptions(), &ctx));
+    } else {
+      auto casted = compute::Cast(data, /*to_type=*/utf8(), 
compute::CastOptions(), &ctx);
+      if (casted.ok()) {
+        array_ = casted.ValueOrDie();

Review Comment:
   ```suggestion
           array_ = std::move(casted).ValueOrDie();
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to