romainfrancois commented on a change in pull request #7819:
URL: https://github.com/apache/arrow/pull/7819#discussion_r471335186



##########
File path: r/src/array_from_vector.cpp
##########
@@ -1064,42 +1063,42 @@ class FixedSizeBinaryVectorConverter : public 
VectorConverter {
   FixedSizeBinaryBuilder* typed_builder_;
 };
 
-template <typename Builder>
+template <typename StringBuilder>
 class StringVectorConverter : public VectorConverter {
  public:
   ~StringVectorConverter() {}
 
   Status Init(ArrayBuilder* builder) {
-    typed_builder_ = checked_cast<Builder*>(builder);
+    typed_builder_ = checked_cast<StringBuilder*>(builder);
     return Status::OK();
   }
 
   Status Ingest(SEXP obj) {
     ARROW_RETURN_IF(TYPEOF(obj) != STRSXP,
                     Status::RError("Expecting a character vector"));
-    R_xlen_t n = XLENGTH(obj);
 
-    // Reserve enough space before appending
-    int64_t size = 0;
-    for (R_xlen_t i = 0; i < n; i++) {
-      SEXP string_i = STRING_ELT(obj, i);
-      if (string_i != NA_STRING) {
-        size += XLENGTH(Rf_mkCharCE(Rf_translateCharUTF8(string_i), CE_UTF8));
-      }
+    cpp11::strings s(obj);
+    RETURN_NOT_OK(typed_builder_->Reserve(s.size()));
+
+    // note: the total length is calculated without utf8
+    //       conversion, so see this more as a hint rather than
+    //       the actual total length
+    auto total_length_hint = 0;
+    for (cpp11::r_string si : s) {
+      total_length_hint += (si == NA_STRING) ? 0 : si.size();

Review comment:
       Thanks. I wasn't aware of `is_na()` 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to