pitrou commented on a change in pull request #10802: URL: https://github.com/apache/arrow/pull/10802#discussion_r689507158
########## File path: cpp/src/arrow/compute/kernels/vector_selection.cc ########## @@ -2146,6 +2147,219 @@ class TakeMetaFunction : public MetaFunction { } }; +// ---------------------------------------------------------------------- +// DropNull Implementation + +Status GetDropNullFilter(const Array& values, MemoryPool* memory_pool, + std::shared_ptr<arrow::BooleanArray>* out_array) { + auto bitmap_buffer = values.null_bitmap(); + *out_array = std::make_shared<BooleanArray>(values.length(), bitmap_buffer, nullptr, 0, + values.offset()); + return Status::OK(); +} + +Status CreateEmptyArray(std::shared_ptr<DataType> type, MemoryPool* memory_pool, + std::shared_ptr<Array>* output_array) { + std::unique_ptr<ArrayBuilder> builder; + RETURN_NOT_OK(MakeBuilder(memory_pool, type, &builder)); + RETURN_NOT_OK(builder->Resize(0)); + ARROW_ASSIGN_OR_RAISE(*output_array, builder->Finish()); + return Status::OK(); +} + +Status CreateEmptyChunkedArray(std::shared_ptr<DataType> type, MemoryPool* memory_pool, + std::shared_ptr<ChunkedArray>* output_array) { + std::vector<std::shared_ptr<Array>> new_chunks(1); // Hard-coded 1 for now + ARROW_RETURN_NOT_OK(CreateEmptyArray(type, memory_pool, &new_chunks[0])); + *output_array = std::make_shared<ChunkedArray>(std::move(new_chunks)); + return Status::OK(); +} + +Result<std::shared_ptr<Array>> DropNullArray(const std::shared_ptr<Array>& values, + ExecContext* ctx) { + if (values->null_count() == 0) { + return values; + } + if (values->type()->Equals(arrow::null())) { + return std::make_shared<NullArray>(0); + } + std::shared_ptr<BooleanArray> drop_null_filter; + RETURN_NOT_OK(GetDropNullFilter(*values, ctx->memory_pool(), &drop_null_filter)); + + if (drop_null_filter->null_count() == drop_null_filter->length()) { + std::shared_ptr<Array> empty_array; + RETURN_NOT_OK(CreateEmptyArray(values->type(), ctx->memory_pool(), &empty_array)); + return empty_array; + } + auto options = FilterOptions::Defaults(); + ARROW_ASSIGN_OR_RAISE( + Datum result, + CallFunction("array_filter", {Datum(*values), Datum(*drop_null_filter)}, &options, + ctx)); + return result.make_array(); +} + +Result<std::shared_ptr<ChunkedArray>> DropNullChunkedArray(const ChunkedArray& values, + ExecContext* ctx) { + if (values.null_count() == values.length()) { + std::shared_ptr<ChunkedArray> empty_array; + RETURN_NOT_OK( + CreateEmptyChunkedArray(values.type(), ctx->memory_pool(), &empty_array)); + return empty_array; + } + std::vector<std::shared_ptr<Array>> new_chunks; + for (const auto& chunk : values.chunks()) { + ARROW_ASSIGN_OR_RAISE(auto new_chunk, DropNullArray(chunk, ctx)); + if (new_chunk->length() > 0) { + new_chunks.push_back(new_chunk); + } + } + return std::make_shared<ChunkedArray>(std::move(new_chunks)); +} + +Result<std::shared_ptr<RecordBatch>> DropNullRecordBatch(const RecordBatch& batch, + ExecContext* ctx) { + int64_t null_count = 0; + for (const auto& column : batch.columns()) { + null_count += column->null_count(); + } + if (null_count == 0) { + return RecordBatch::Make(batch.schema(), batch.num_rows(), batch.columns()); Review comment: Ping here. If you pass this function a `const std::shared_ptr<RecordBatch>& batch` argument, you can just `return batch` here AFAICT. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org