bkietz commented on a change in pull request #8365:
URL: https://github.com/apache/arrow/pull/8365#discussion_r551542391
##########
File path: r/src/array_to_vector.cpp
##########
@@ -288,36 +290,104 @@ struct Converter_String : public Converter {
}
StringArrayType* string_array = static_cast<StringArrayType*>(array.get());
- if (array->null_count()) {
- // need to watch for nulls
- arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
- array->offset(), n);
+
+ const bool all_valid = array->null_count() == 0;
+ const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false);
+
+ bool nul_was_stripped = false;
+
+ if (all_valid) {
+ // no need to watch for missing strings
cpp11::unwind_protect([&] {
- for (int i = 0; i < n; i++, null_reader.Next()) {
- if (null_reader.IsSet()) {
- SET_STRING_ELT(data, start + i,
r_string_from_view(string_array->GetView(i)));
- } else {
- SET_STRING_ELT(data, start + i, NA_STRING);
+ if (strip_out_nuls) {
+ for (int i = 0; i < n; i++) {
+ SET_STRING_ELT(data, start + i,
+
r_string_from_view_strip_nul(string_array->GetView(i),
+ &nul_was_stripped));
}
+ return;
+ }
+
+ for (int i = 0; i < n; i++) {
+ SET_STRING_ELT(data, start + i,
r_string_from_view(string_array->GetView(i)));
}
});
} else {
cpp11::unwind_protect([&] {
- for (int i = 0; i < n; i++) {
- SET_STRING_ELT(data, start + i,
r_string_from_view(string_array->GetView(i)));
+ arrow::internal::BitmapReader
validity_reader(array->null_bitmap_data(),
+ array->offset(), n);
+
+ if (strip_out_nuls) {
+ for (int i = 0; i < n; i++, validity_reader.Next()) {
+ if (validity_reader.IsSet()) {
+ SET_STRING_ELT(data, start + i,
+
r_string_from_view_strip_nul(string_array->GetView(i),
+ &nul_was_stripped));
+ } else {
+ SET_STRING_ELT(data, start + i, NA_STRING);
+ }
+ }
+ return;
+ }
+
+ for (int i = 0; i < n; i++, validity_reader.Next()) {
+ if (validity_reader.IsSet()) {
+ SET_STRING_ELT(data, start + i,
r_string_from_view(string_array->GetView(i)));
+ } else {
+ SET_STRING_ELT(data, start + i, NA_STRING);
+ }
}
});
}
+ if (nul_was_stripped) {
+ cpp11::warning("Stripping '\\0' (nul) from character vector");
+ }
+
return Status::OK();
}
bool Parallel() const { return false; }
private:
- static SEXP r_string_from_view(const arrow::util::string_view& view) {
+ static SEXP r_string_from_view(arrow::util::string_view view) {
Review comment:
string_view is a trivially copyable structure so there's no need to
protect it from copies with `const&`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]