HappenLee commented on code in PR #58993:
URL: https://github.com/apache/doris/pull/58993#discussion_r2658740683
##########
be/src/vec/aggregate_functions/aggregate_function_python_udaf.cpp:
##########
@@ -59,40 +61,83 @@ Status AggregatePythonUDAFData::add(int64_t place_id, const
IColumn** columns,
std::shared_ptr<arrow::Schema> schema;
RETURN_IF_ERROR(
get_arrow_schema_from_block(input_block, &schema,
TimezoneUtils::default_time_zone));
-
- std::shared_ptr<arrow::RecordBatch> batch;
cctz::time_zone timezone_obj;
TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone,
timezone_obj);
+
+ std::shared_ptr<arrow::RecordBatch> batch;
+ // Zero-copy: convert only the specified range
RETURN_IF_ERROR(convert_to_arrow_batch(input_block, schema,
arrow::default_memory_pool(),
- &batch, timezone_obj));
+ &batch, timezone_obj,
row_num_start, row_num_end));
+ // Send the batch (already sliced in convert_to_arrow_batch)
+ // Single place mode: no places column needed
+ RETURN_IF_ERROR(client->accumulate(place_id, true, *batch, 0,
batch->num_rows()));
+ return Status::OK();
+}
+
+Status AggregatePythonUDAFData::add_batch(AggregateDataPtr* places, size_t
place_offset,
+ size_t num_rows, const IColumn**
columns,
+ const DataTypes& argument_types,
size_t start,
+ size_t end) {
+ DCHECK(client) << "Client must be set before calling add_batch";
+ DCHECK(end > start) << "end must be greater than start";
+ DCHECK(end <= num_rows) << "end must not exceed num_rows";
+
+ size_t slice_rows = end - start;
+ Block input_block;
+ for (size_t i = 0; i < argument_types.size(); ++i) {
+ DCHECK(columns[i]->size() == num_rows) << "Column size must match
num_rows";
+ input_block.insert(
+ ColumnWithTypeAndName(columns[i]->get_ptr(),
argument_types[i], std::to_string(i)));
+ }
+
+ auto places_col = ColumnInt64::create(num_rows);
+ auto& places_data = places_col->get_data();
+ // Fill places column with place IDs for the slice [start, end)
+ for (size_t i = start; i < end; ++i) {
+ places_data[i] = reinterpret_cast<int64_t>(places[i] + place_offset);
Review Comment:
if each udaf have different place id,maybe we have too big hash table?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]