nealrichardson commented on a change in pull request #8365: URL: https://github.com/apache/arrow/pull/8365#discussion_r551519433
########## File path: r/src/array_to_vector.cpp ########## @@ -288,36 +290,104 @@ struct Converter_String : public Converter { } StringArrayType* string_array = static_cast<StringArrayType*>(array.get()); - if (array->null_count()) { - // need to watch for nulls - arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), - array->offset(), n); + + const bool all_valid = array->null_count() == 0; + const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false); + + bool nul_was_stripped = false; + + if (all_valid) { + // no need to watch for missing strings cpp11::unwind_protect([&] { - for (int i = 0; i < n; i++, null_reader.Next()) { - if (null_reader.IsSet()) { - SET_STRING_ELT(data, start + i, r_string_from_view(string_array->GetView(i))); - } else { - SET_STRING_ELT(data, start + i, NA_STRING); + if (strip_out_nuls) { + for (int i = 0; i < n; i++) { + SET_STRING_ELT(data, start + i, + r_string_from_view_strip_nul(string_array->GetView(i), + &nul_was_stripped)); } + return; Review comment: The main function return (L347) is `return Status::OK();` -- should these do the same? Or do I misunderstand what this return does? ########## File path: r/src/array_to_vector.cpp ########## @@ -288,36 +290,104 @@ struct Converter_String : public Converter { } StringArrayType* string_array = static_cast<StringArrayType*>(array.get()); - if (array->null_count()) { - // need to watch for nulls - arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), - array->offset(), n); + + const bool all_valid = array->null_count() == 0; + const bool strip_out_nuls = GetBoolOption("arrow.skip_nul", false); + + bool nul_was_stripped = false; + + if (all_valid) { + // no need to watch for missing strings cpp11::unwind_protect([&] { - for (int i = 0; i < n; i++, null_reader.Next()) { - if (null_reader.IsSet()) { - SET_STRING_ELT(data, start + i, r_string_from_view(string_array->GetView(i))); - } else { - SET_STRING_ELT(data, start + i, NA_STRING); + if (strip_out_nuls) { + for (int i = 0; i < n; i++) { + SET_STRING_ELT(data, start + i, + r_string_from_view_strip_nul(string_array->GetView(i), + &nul_was_stripped)); } + return; + } + + for (int i = 0; i < n; i++) { + SET_STRING_ELT(data, start + i, r_string_from_view(string_array->GetView(i))); } }); } else { cpp11::unwind_protect([&] { - for (int i = 0; i < n; i++) { - SET_STRING_ELT(data, start + i, r_string_from_view(string_array->GetView(i))); + arrow::internal::BitmapReader validity_reader(array->null_bitmap_data(), + array->offset(), n); + + if (strip_out_nuls) { + for (int i = 0; i < n; i++, validity_reader.Next()) { + if (validity_reader.IsSet()) { + SET_STRING_ELT(data, start + i, + r_string_from_view_strip_nul(string_array->GetView(i), + &nul_was_stripped)); + } else { + SET_STRING_ELT(data, start + i, NA_STRING); + } + } + return; + } + + for (int i = 0; i < n; i++, validity_reader.Next()) { + if (validity_reader.IsSet()) { + SET_STRING_ELT(data, start + i, r_string_from_view(string_array->GetView(i))); + } else { + SET_STRING_ELT(data, start + i, NA_STRING); + } } }); } + if (nul_was_stripped) { + cpp11::warning("Stripping '\\0' (nul) from character vector"); + } + return Status::OK(); } bool Parallel() const { return false; } private: - static SEXP r_string_from_view(const arrow::util::string_view& view) { + static SEXP r_string_from_view(arrow::util::string_view view) { Review comment: Why this change? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org