nealrichardson commented on a change in pull request #8365:
URL: https://github.com/apache/arrow/pull/8365#discussion_r551519433



##########
File path: r/src/array_to_vector.cpp
##########
@@ -288,36 +290,104 @@ struct Converter_String : public Converter {
     }
 
     StringArrayType* string_array = static_cast<StringArrayType*>(array.get());
-    if (array->null_count()) {
-      // need to watch for nulls
-      arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
-                                                array->offset(), n);
+
+    const bool all_valid = array->null_count() == 0;
+    const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+
+    bool nul_was_stripped = false;
+
+    if (all_valid) {
+      // no need to watch for missing strings
       cpp11::unwind_protect([&] {
-        for (int i = 0; i < n; i++, null_reader.Next()) {
-          if (null_reader.IsSet()) {
-            SET_STRING_ELT(data, start + i, 
r_string_from_view(string_array->GetView(i)));
-          } else {
-            SET_STRING_ELT(data, start + i, NA_STRING);
+        if (strip_out_nuls) {
+          for (int i = 0; i < n; i++) {
+            SET_STRING_ELT(data, start + i,
+                           
r_string_from_view_strip_nul(string_array->GetView(i),
+                                                        &nul_was_stripped));
           }
+          return;

Review comment:
       The main function return (L347) is `return Status::OK();` -- should 
these do the same? Or do I misunderstand what this return does?

##########
File path: r/src/array_to_vector.cpp
##########
@@ -288,36 +290,104 @@ struct Converter_String : public Converter {
     }
 
     StringArrayType* string_array = static_cast<StringArrayType*>(array.get());
-    if (array->null_count()) {
-      // need to watch for nulls
-      arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
-                                                array->offset(), n);
+
+    const bool all_valid = array->null_count() == 0;
+    const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+
+    bool nul_was_stripped = false;
+
+    if (all_valid) {
+      // no need to watch for missing strings
       cpp11::unwind_protect([&] {
-        for (int i = 0; i < n; i++, null_reader.Next()) {
-          if (null_reader.IsSet()) {
-            SET_STRING_ELT(data, start + i, 
r_string_from_view(string_array->GetView(i)));
-          } else {
-            SET_STRING_ELT(data, start + i, NA_STRING);
+        if (strip_out_nuls) {
+          for (int i = 0; i < n; i++) {
+            SET_STRING_ELT(data, start + i,
+                           
r_string_from_view_strip_nul(string_array->GetView(i),
+                                                        &nul_was_stripped));
           }
+          return;
+        }
+
+        for (int i = 0; i < n; i++) {
+          SET_STRING_ELT(data, start + i, 
r_string_from_view(string_array->GetView(i)));
         }
       });
     } else {
       cpp11::unwind_protect([&] {
-        for (int i = 0; i < n; i++) {
-          SET_STRING_ELT(data, start + i, 
r_string_from_view(string_array->GetView(i)));
+        arrow::internal::BitmapReader 
validity_reader(array->null_bitmap_data(),
+                                                      array->offset(), n);
+
+        if (strip_out_nuls) {
+          for (int i = 0; i < n; i++, validity_reader.Next()) {
+            if (validity_reader.IsSet()) {
+              SET_STRING_ELT(data, start + i,
+                             
r_string_from_view_strip_nul(string_array->GetView(i),
+                                                          &nul_was_stripped));
+            } else {
+              SET_STRING_ELT(data, start + i, NA_STRING);
+            }
+          }
+          return;
+        }
+
+        for (int i = 0; i < n; i++, validity_reader.Next()) {
+          if (validity_reader.IsSet()) {
+            SET_STRING_ELT(data, start + i, 
r_string_from_view(string_array->GetView(i)));
+          } else {
+            SET_STRING_ELT(data, start + i, NA_STRING);
+          }
         }
       });
     }
 
+    if (nul_was_stripped) {
+      cpp11::warning("Stripping '\\0' (nul) from character vector");
+    }
+
     return Status::OK();
   }
 
   bool Parallel() const { return false; }
 
  private:
-  static SEXP r_string_from_view(const arrow::util::string_view& view) {
+  static SEXP r_string_from_view(arrow::util::string_view view) {

Review comment:
       Why this change?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to