aocsa commented on a change in pull request #10802: URL: https://github.com/apache/arrow/pull/10802#discussion_r688602178
########## File path: cpp/src/arrow/compute/kernels/vector_selection.cc ########## @@ -2146,6 +2147,219 @@ class TakeMetaFunction : public MetaFunction { } }; +// ---------------------------------------------------------------------- +// DropNull Implementation + +Status GetDropNullFilter(const Array& values, MemoryPool* memory_pool, + std::shared_ptr<arrow::BooleanArray>* out_array) { + auto bitmap_buffer = values.null_bitmap(); + *out_array = std::make_shared<BooleanArray>(values.length(), bitmap_buffer, nullptr, 0, + values.offset()); + return Status::OK(); +} + +Status CreateEmptyArray(std::shared_ptr<DataType> type, MemoryPool* memory_pool, + std::shared_ptr<Array>* output_array) { + std::unique_ptr<ArrayBuilder> builder; + RETURN_NOT_OK(MakeBuilder(memory_pool, type, &builder)); + RETURN_NOT_OK(builder->Resize(0)); + ARROW_ASSIGN_OR_RAISE(*output_array, builder->Finish()); + return Status::OK(); +} + +Status CreateEmptyChunkedArray(std::shared_ptr<DataType> type, MemoryPool* memory_pool, + std::shared_ptr<ChunkedArray>* output_array) { + std::vector<std::shared_ptr<Array>> new_chunks(1); // Hard-coded 1 for now + ARROW_RETURN_NOT_OK(CreateEmptyArray(type, memory_pool, &new_chunks[0])); + *output_array = std::make_shared<ChunkedArray>(std::move(new_chunks)); + return Status::OK(); +} + +Result<std::shared_ptr<Array>> DropNullArray(const std::shared_ptr<Array>& values, + ExecContext* ctx) { + if (values->null_count() == 0) { + return values; + } + if (values->type()->Equals(arrow::null())) { + return std::make_shared<NullArray>(0); + } + std::shared_ptr<BooleanArray> drop_null_filter; + RETURN_NOT_OK(GetDropNullFilter(*values, ctx->memory_pool(), &drop_null_filter)); + + if (drop_null_filter->null_count() == drop_null_filter->length()) { + std::shared_ptr<Array> empty_array; + RETURN_NOT_OK(CreateEmptyArray(values->type(), ctx->memory_pool(), &empty_array)); + return empty_array; + } + auto options = FilterOptions::Defaults(); + ARROW_ASSIGN_OR_RAISE( + Datum result, + CallFunction("array_filter", {Datum(*values), Datum(*drop_null_filter)}, &options, + ctx)); + return result.make_array(); +} + +Result<std::shared_ptr<ChunkedArray>> DropNullChunkedArray(const ChunkedArray& values, + ExecContext* ctx) { + if (values.null_count() == values.length()) { + std::shared_ptr<ChunkedArray> empty_array; + RETURN_NOT_OK( + CreateEmptyChunkedArray(values.type(), ctx->memory_pool(), &empty_array)); + return empty_array; + } Review comment: yeap, https://github.com/apache/arrow/blob/2675cb94ca26126f21a93ed596347545f1e72928/cpp/src/arrow/compute/kernels/vector_selection.cc#L2181 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org