HappenLee commented on code in PR #58348: URL: https://github.com/apache/doris/pull/58348#discussion_r2575760607
########## be/src/udf/python/python_udtf_client.h: ########## @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <arrow/status.h> + +#include "arrow/flight/client.h" +#include "common/status.h" +#include "udf/python/python_udf_meta.h" +#include "udf/python/python_udf_runtime.h" +#include "util/arrow/utils.h" + +namespace doris { + +class PythonUDTFClient; + +using PythonUDTFClientPtr = std::shared_ptr<PythonUDTFClient>; + +/** + * Python UDTF Client + * + * Implements simplified UDTF (User-Defined Table Function): + * + * Handler Function: + * - evaluate_func(*args): Process input arguments and yield output rows + * + * UDTF Characteristics: + * - Takes scalar or table inputs + * - Returns table (multiple rows) + * - Simple yield pattern + * + * Example: + * ```python + * def evaluate_func(text, delimiter): + * # Split string by delimiter and return multiple results + * for item in text.split(delimiter): + * yield (item, ) + * ``` + * + * SQL Usage: + * ```sql + * SELECT * FROM TABLE(split_string('a,b,c', ',')); + * ``` + * + * Communication protocol with Python server: + * 1. Send input row batch to Python + * 2. Python calls evaluate_func() for each input row + * 3. Collect all output rows and return + */ +class PythonUDTFClient { +public: + using FlightDescriptor = arrow::flight::FlightDescriptor; + using FlightClient = arrow::flight::FlightClient; + using FlightStreamWriter = arrow::flight::FlightStreamWriter; + using FlightStreamReader = arrow::flight::FlightStreamReader; + + PythonUDTFClient() = default; + ~PythonUDTFClient() = default; + + static Status create(const PythonUDFMeta& func_meta, ProcessPtr process, + PythonUDTFClientPtr* client); + + Status init(const PythonUDFMeta& func_meta, ProcessPtr process); + + /** + * Evaluate UDTF on input rows + * + * Protocol (ListArray-based): + * Python server returns a RecordBatch with 1 column: + * - Column 0: ListArray where each list element corresponds to one input row's outputs + * + * Example: + * Input: 3 rows + * Output ListArray: + * [0]: [val1, val2, val3] (3 elements for input row 0) + * [1]: [] (0 elements for input row 1) + * [2]: [val4, val5, val6, val7] (4 elements for input row 2) + * + * @param input Input row batch (columns = UDTF function parameters) + * @param list_array Output ListArray (length = num_input_rows) + * @return Status + */ + Status evaluate(const arrow::RecordBatch& input, std::shared_ptr<arrow::ListArray>* list_array); + + Status close(); + + Status handle_error(arrow::Status status); + + std::string print_process() const { return _process->to_string(); } + +private: + DISALLOW_COPY_AND_ASSIGN(PythonUDTFClient); + + bool _inited = false; Review Comment: all the param in the class `PythonUDTFClient` have same as `PythonUDFClient` and `PythonUDAFClient`. maybe the 3 class should have a same parent class or maybe 3 class should use same class -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
