kou commented on code in PR #38632: URL: https://github.com/apache/arrow/pull/38632#discussion_r1387382042
########## cpp/src/gandiva/external_c_interface_functions.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License + +#include "llvm/IR/Type.h" + +#include "gandiva/engine.h" +#include "gandiva/exported_funcs.h" + +namespace gandiva { +static arrow::Result<llvm::Type*> AsLLVMType(const DataTypePtr& from_type, + LLVMTypes* types) { + switch (from_type->id()) { + case arrow::Type::BOOL: + return types->i1_type(); + case arrow::Type::INT8: + case arrow::Type::UINT8: + return types->i8_type(); + case arrow::Type::INT16: + case arrow::Type::UINT16: + return types->i16_type(); + case arrow::Type::INT32: + case arrow::Type::UINT32: + return types->i32_type(); + case arrow::Type::INT64: + case arrow::Type::UINT64: + return types->i64_type(); + case arrow::Type::FLOAT: + return types->float_type(); + case arrow::Type::DOUBLE: + return types->double_type(); + default: + return Status::NotImplemented("Unsupported arrow data type: " + + from_type->ToString()); + } +} + +// map from a NativeFunction's signature to the corresponding LLVM signature +static arrow::Result<std::pair<std::vector<llvm::Type*>, llvm::Type*>> MapToLLVMSignature( + const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { + std::vector<llvm::Type*> args; + args.reserve(sig.param_types().size()); + if (func.NeedsContext()) { + args.emplace_back(types->i64_type()); + } + if (func.NeedsFunctionHolder()) { + args.emplace_back(types->i64_type()); + } + for (auto const& arg : sig.param_types()) { + if (arg->id() == arrow::Type::STRING) { + args.emplace_back(types->i8_ptr_type()); + args.emplace_back(types->i32_type()); + } else { + ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); + args.emplace_back(arg_llvm_type); + } + } + llvm::Type* ret_llvm_type; + if (sig.ret_type()->id() == arrow::Type::STRING) { + // for string output, the last arg is the output length + args.emplace_back(types->i32_ptr_type()); + ret_llvm_type = types->i8_ptr_type(); + } else { + ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); + } + auto return_type = AsLLVMType(sig.ret_type(), types); Review Comment: It seems that this isn't used. ########## cpp/src/gandiva/external_c_interface_functions.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License + +#include "llvm/IR/Type.h" + +#include "gandiva/engine.h" +#include "gandiva/exported_funcs.h" + +namespace gandiva { +static arrow::Result<llvm::Type*> AsLLVMType(const DataTypePtr& from_type, + LLVMTypes* types) { + switch (from_type->id()) { + case arrow::Type::BOOL: + return types->i1_type(); + case arrow::Type::INT8: + case arrow::Type::UINT8: + return types->i8_type(); + case arrow::Type::INT16: + case arrow::Type::UINT16: + return types->i16_type(); + case arrow::Type::INT32: + case arrow::Type::UINT32: + return types->i32_type(); + case arrow::Type::INT64: + case arrow::Type::UINT64: + return types->i64_type(); + case arrow::Type::FLOAT: + return types->float_type(); + case arrow::Type::DOUBLE: + return types->double_type(); + default: + return Status::NotImplemented("Unsupported arrow data type: " + + from_type->ToString()); + } +} + +// map from a NativeFunction's signature to the corresponding LLVM signature +static arrow::Result<std::pair<std::vector<llvm::Type*>, llvm::Type*>> MapToLLVMSignature( + const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { + std::vector<llvm::Type*> args; + args.reserve(sig.param_types().size()); + if (func.NeedsContext()) { + args.emplace_back(types->i64_type()); + } + if (func.NeedsFunctionHolder()) { + args.emplace_back(types->i64_type()); + } + for (auto const& arg : sig.param_types()) { + if (arg->id() == arrow::Type::STRING) { + args.emplace_back(types->i8_ptr_type()); + args.emplace_back(types->i32_type()); + } else { + ARROW_ASSIGN_OR_RAISE(auto arg_llvm_type, AsLLVMType(arg, types)); + args.emplace_back(arg_llvm_type); + } + } + llvm::Type* ret_llvm_type; + if (sig.ret_type()->id() == arrow::Type::STRING) { + // for string output, the last arg is the output length + args.emplace_back(types->i32_ptr_type()); + ret_llvm_type = types->i8_ptr_type(); + } else { + ARROW_ASSIGN_OR_RAISE(ret_llvm_type, AsLLVMType(sig.ret_type(), types)); + } + auto return_type = AsLLVMType(sig.ret_type(), types); + return std::make_pair(args, ret_llvm_type); Review Comment: Do we need `std::move()` for `args` here? ```suggestion return std::make_pair(std::move(args), ret_llvm_type); ``` ########## cpp/src/gandiva/external_c_interface_functions.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License + +#include "llvm/IR/Type.h" + +#include "gandiva/engine.h" +#include "gandiva/exported_funcs.h" + +namespace gandiva { +static arrow::Result<llvm::Type*> AsLLVMType(const DataTypePtr& from_type, + LLVMTypes* types) { + switch (from_type->id()) { + case arrow::Type::BOOL: + return types->i1_type(); + case arrow::Type::INT8: + case arrow::Type::UINT8: + return types->i8_type(); + case arrow::Type::INT16: + case arrow::Type::UINT16: + return types->i16_type(); + case arrow::Type::INT32: + case arrow::Type::UINT32: + return types->i32_type(); + case arrow::Type::INT64: + case arrow::Type::UINT64: + return types->i64_type(); + case arrow::Type::FLOAT: + return types->float_type(); + case arrow::Type::DOUBLE: + return types->double_type(); + default: + return Status::NotImplemented("Unsupported arrow data type: " + + from_type->ToString()); + } +} + +// map from a NativeFunction's signature to the corresponding LLVM signature +static arrow::Result<std::pair<std::vector<llvm::Type*>, llvm::Type*>> MapToLLVMSignature( + const FunctionSignature& sig, const NativeFunction& func, LLVMTypes* types) { + std::vector<llvm::Type*> args; Review Comment: `arg_types` or `arg_llvm_type` may be better because we use `ret_llvm_type` for a return type. ########## cpp/src/gandiva/tests/test_util.cc: ########## @@ -42,11 +46,129 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } -std::shared_ptr<Configuration> TestConfigurationWithFunctionRegistry( +static NativeFunction GetTestExternalCInterfaceFunction() { + NativeFunction multiply_by_three_func( + "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), + ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); + return multiply_by_three_func; +} + +static NativeFunction GetTestFunctionWithFunctionHolder() { + // the 2nd parameter is expected to be an int32 literal + NativeFunction multiply_by_n_func("multiply_by_n", {}, {arrow::int32(), arrow::int32()}, + arrow::int64(), ResultNullableType::kResultNullIfNull, + "multiply_by_n_int32_int32", + NativeFunction::kNeedsFunctionHolder); + return multiply_by_n_func; +} + +static NativeFunction GetTestFunctionWithContext() { + NativeFunction multiply_by_two_formula( + "multiply_by_two_formula", {}, {arrow::utf8()}, arrow::utf8(), + ResultNullableType::kResultNullIfNull, "multiply_by_two_formula_utf8", + NativeFunction::kNeedsContext); + return multiply_by_two_formula; +} + +static std::shared_ptr<Configuration> BuildConfigurationWithRegistry( + std::shared_ptr<FunctionRegistry> registry, + const std::function<arrow::Status(std::shared_ptr<FunctionRegistry>)>& + register_func) { + ARROW_EXPECT_OK(register_func(registry)); + return ConfigurationBuilder().build(std::move(registry)); +} + +std::shared_ptr<Configuration> TestConfigWithFunctionRegistry( + std::shared_ptr<FunctionRegistry> registry) { + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register({GetTestExternalFunction()}, GetTestFunctionLLVMIRPath()); + }); +} + +class MultiplyHolder : public FunctionHolder { + public: + explicit MultiplyHolder(int32_t num) : num_(num) {} + + static Status Make(const FunctionNode& node, std::shared_ptr<MultiplyHolder>* holder) { Review Comment: How about using `Result` instead of `Status`? ```suggestion static Result<std::shared_ptr<MultiplyHolder>> Make(const FunctionNode& node) { ``` ########## cpp/src/gandiva/tests/test_util.cc: ########## @@ -42,11 +46,129 @@ NativeFunction GetTestExternalFunction() { return multiply_by_two_func; } -std::shared_ptr<Configuration> TestConfigurationWithFunctionRegistry( +static NativeFunction GetTestExternalCInterfaceFunction() { + NativeFunction multiply_by_three_func( + "multiply_by_three", {}, {arrow::int32()}, arrow::int64(), + ResultNullableType::kResultNullIfNull, "multiply_by_three_int32"); + return multiply_by_three_func; +} + +static NativeFunction GetTestFunctionWithFunctionHolder() { + // the 2nd parameter is expected to be an int32 literal + NativeFunction multiply_by_n_func("multiply_by_n", {}, {arrow::int32(), arrow::int32()}, + arrow::int64(), ResultNullableType::kResultNullIfNull, + "multiply_by_n_int32_int32", + NativeFunction::kNeedsFunctionHolder); + return multiply_by_n_func; +} + +static NativeFunction GetTestFunctionWithContext() { + NativeFunction multiply_by_two_formula( + "multiply_by_two_formula", {}, {arrow::utf8()}, arrow::utf8(), + ResultNullableType::kResultNullIfNull, "multiply_by_two_formula_utf8", + NativeFunction::kNeedsContext); + return multiply_by_two_formula; +} + +static std::shared_ptr<Configuration> BuildConfigurationWithRegistry( + std::shared_ptr<FunctionRegistry> registry, + const std::function<arrow::Status(std::shared_ptr<FunctionRegistry>)>& + register_func) { + ARROW_EXPECT_OK(register_func(registry)); + return ConfigurationBuilder().build(std::move(registry)); +} + +std::shared_ptr<Configuration> TestConfigWithFunctionRegistry( + std::shared_ptr<FunctionRegistry> registry) { + return BuildConfigurationWithRegistry(std::move(registry), [](auto reg) { + return reg->Register({GetTestExternalFunction()}, GetTestFunctionLLVMIRPath()); + }); +} + +class MultiplyHolder : public FunctionHolder { + public: + explicit MultiplyHolder(int32_t num) : num_(num) {} + + static Status Make(const FunctionNode& node, std::shared_ptr<MultiplyHolder>* holder) { + ARROW_RETURN_IF(node.children().size() != 2, + Status::Invalid("'multiply_by_n' function requires two parameters")); + + auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get()); + ARROW_RETURN_IF( + literal == nullptr, + Status::Invalid( + "'multiply_by_n' function requires a literal as the 2nd parameter")); + + auto literal_type = literal->return_type()->id(); + ARROW_RETURN_IF( + literal_type != arrow::Type::INT32, + Status::Invalid( + "'multiply_by_n' function requires an int32 literal as the 2nd parameter")); + + *holder = std::make_shared<MultiplyHolder>( + literal->is_null() ? 0 : std::get<int32_t>(literal->holder())); + return Status::OK(); + } + + int32_t operator()() const { return num_; } + + private: + int32_t num_; +}; + +extern "C" { +// this function is used as an external stub function for testing so it has to be declared Review Comment: ```suggestion // this function is used as an external C interface function for testing so it has to be declared ``` ########## cpp/src/gandiva/function_holder_maker_registry.cc: ########## @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gandiva/function_holder_maker_registry.h" + +#include <functional> + +#include "gandiva/function_holder.h" +#include "gandiva/interval_holder.h" +#include "gandiva/random_generator_holder.h" +#include "gandiva/regex_functions_holder.h" +#include "gandiva/to_date_holder.h" + +namespace gandiva { + +FunctionHolderMakerRegistry::FunctionHolderMakerRegistry() + : function_holder_makers_(DefaultHolderMakers()) {} + +static std::string to_lower(const std::string& str) { + std::string data = str; + std::transform(data.begin(), data.end(), data.begin(), + [](unsigned char c) { return std::tolower(c); }); + return data; +} +arrow::Status FunctionHolderMakerRegistry::Register(const std::string& name, + FunctionHolderMaker holder_maker) { + function_holder_makers_.emplace(to_lower(name), std::move(holder_maker)); Review Comment: How about using existing `arrow::internal::AsciiToLower()`? ```suggestion function_holder_makers_.emplace(arrow::internal::AsciiToLower(name), std::move(holder_maker)); ``` ########## cpp/src/gandiva/function_registry.cc: ########## @@ -109,11 +109,35 @@ arrow::Status FunctionRegistry::Register(const std::vector<NativeFunction>& func return Status::OK(); } +arrow::Status FunctionRegistry::Register( + NativeFunction func, void* stub_function_ptr, Review Comment: ```suggestion NativeFunction func, void* c_interface_function_ptr, ``` ########## cpp/src/gandiva/gdv_string_function_stubs.cc: ########## @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//#pragma once +// #pragma once Review Comment: Can we remove this needless comment? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
