Cerdore opened a new issue, #39024:
URL: https://github.com/apache/arrow/issues/39024

   ### Describe the usage question you have. Please include as many useful 
details as  possible.
   
   
   I have now inherited and implemented the ExtensionType , which storage type 
is int16(). 
   I want to use the ExtensionType's Array and Scalar to call a custom add 
function by CallFunction. The custom add function using the built-in Kernel for 
the two int16(). I also wrote a TypeMatcher to match the Kernel.
   
   Currently, two ExtensionArrays can be added together successfully, but there 
is an issue when adding an ExtensionArray and an ExtensionScalar; it throws a 
std::bad_cast error.
   
   After investigating, I found that the parent class of ExtensionScalar is 
Scalar, but it needs to be cast to PrimitiveScalarBase (whose parent class is 
also Scalar), which fails.
   
   
   ExtensionType:
   ```
   class SmallintTypecxs : public ExtensionType
   {
   public:
        SmallintTypecxs() : ExtensionType(int16()) {}
   
        std::string extension_name() const override { return "smallintcxs"; }
   
        bool ExtensionEquals(const ExtensionType &other) const override;
   
        std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
   
        Result<std::shared_ptr<DataType>> Deserialize(
                std::shared_ptr<DataType> storage_type,
                const std::string &serialized) const override;
   
        std::string Serialize() const override { return "smallintcxs"; }
   
        const std::shared_ptr<DataType>& storage_type() const { return 
storage_type_; }
   private:
   };
   std::shared_ptr<DataType> smallintcxs() { return 
std::make_shared<SmallintTypecxs>(); }
   ```
   
   TypeMatcher
   ```
   //============ type matcher
   template <typename ArrowType>
   class ExtendMatcher : public TypeMatcher {
     using ThisType = ExtendMatcher<ArrowType>;
   
    public:
     explicit ExtendMatcher(Type::type oid)
         : type_id_(oid) {}
   
     bool Matches(const DataType& type) const override {
       if (type.id() != ArrowType::type_id) {
         return false;
       }
   
        const auto& ex_type = arrow::internal::checked_cast<const 
ArrowType&>(type);
   
        return ex_type.storage_type().get()->id() == type_id_;
     }
   
     bool Equals(const TypeMatcher& other) const override {
       if (this == &other) {
         return true;
       }
       auto casted = dynamic_cast<const ThisType*>(&other);
       if (casted == nullptr) {
         return false;
       }
       return this->type_id_ == casted->type_id_;
     }
   
     std::string ToString() const override {
       std::stringstream ss;
       ss << ArrowType::type_name() << "(" << type_id_
          << ")";
       return ss.str();
     }
   
    private:
        Type::type type_id_;
   };
   ```
   
   ```
   Status RegisterExtensionFunc()
   {
        auto registry = arrow::compute::GetFunctionRegistry();
        auto ty = smallintcxs();
   
        std::shared_ptr<Function> fc ;
        ARROW_ASSIGN_OR_RAISE(fc, registry->GetFunction("add"));
   
        std::vector<TypeHolder> args = {int16(), int16()};
   
        // auto kernel = func.DispatchExact(args);
        const Kernel* kernel;
        ARROW_ASSIGN_OR_RAISE(kernel, arrow::internal::checked_cast<const 
ScalarFunction&>(*fc)
                                .DispatchExact(args));
        
        const arrow::compute::ScalarKernel* derived_ptr = static_cast<const 
arrow::compute::ScalarKernel*>(kernel);
   
        std::cout<<derived_ptr->signature->out_type().ToString()<<std::endl;
   
           auto func = std::make_shared<ScalarFunction>("cxsadd", 
Arity::Binary(),
                                                    
/*doc=*/FunctionDoc::Empty());
   
           InputType in_type(SmallIntcxs(ty.get()->storage_id()));
   
           ARROW_RETURN_NOT_OK(func->AddKernel({in_type,in_type}, int16(), 
derived_ptr->exec)); 
        
        ARROW_RETURN_NOT_OK(registry->AddFunction(func));
   
        return Status::OK();
   }
   
   ```
   
   CallFunction
   ```
   arrow::Status testSmallIntType()
   {
   
        ARROW_RETURN_NOT_OK(arrow::compute::RegisterExtensionFunc());
   
        auto ary1 = arrow::SmallintArraycxsFromJSON("[30000, 100, 200, 1, 2]");
   
        auto ary2 = arrow::SmallintArraycxsFromJSON("[30000, 100, 200, 1, 2]");
        
        std::shared_ptr<arrow::Scalar> scalar1;
   
        ARROW_ASSIGN_OR_RAISE(scalar1, arrow::MakeScalar(arrow::smallintcxs(), 
1));
   
        arrow::Datum incremented_datum;
   
   
        //ARROW_ASSIGN_OR_RAISE(incremented_datum,
        //                                        
arrow::compute::CallFunction("cxsadd", {ary1, ary2})); // it's ok
   
        ARROW_ASSIGN_OR_RAISE(incremented_datum,
                                                  
arrow::compute::CallFunction("cxsadd", {ary1, scalar1})); // not work
        std::shared_ptr<Array> incremented_array = 
std::move(incremented_datum).make_array();
   
        auto resarr = 
std::static_pointer_cast<arrow::Int16Array>(incremented_array);
   
        std::cout<<"begin output"<<std::endl;
   
        for(int i = 0; i < resarr->length(); i++)
        {
                std::cout << static_cast<int64_t>(resarr->Value(i)) << 
std::endl;
        }
     return arrow::Status::OK();
   }
   ```
   When I use 'ARROW_ASSIGN_OR_RAISE(incremented_datum,
                                                  
arrow::compute::CallFunction("cxsadd", {ary1, scalar1})); ', program will abort 
and show this: 
   ```
   terminate called after throwing an instance of 'std::bad_cast'
     what():  std::bad_cast
   ```
   
   coredump file:
   
   ```
   #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
   #1  0x00007fd65aa73859 in __GI_abort () at abort.c:79
   #2  0x00007fd65ae708d1 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
   #3  0x00007fd65ae7c37c in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
   #4  0x00007fd65ae7c3e7 in std::terminate() () from 
/lib/x86_64-linux-gnu/libstdc++.so.6
   #5  0x00007fd65ae7c699 in __cxa_throw () from 
/lib/x86_64-linux-gnu/libstdc++.so.6
   #6  0x00007fd65ae7033c in __cxa_bad_cast () from 
/lib/x86_64-linux-gnu/libstdc++.so.6
   #7  0x0000559b33971f3f in 
arrow::internal::checked_cast<arrow::internal::PrimitiveScalarBase const&, 
arrow::Scalar const&> (value=...)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/util/checked_cast.h:38
   #8  0x0000559b3395fbae in 
arrow::compute::internal::UnboxScalar<arrow::Int16Type, void>::Unbox (val=...) 
at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h:341
   #9  0x0000559b33cd3331 in 
arrow::compute::internal::applicator::ScalarBinary<arrow::Int16Type, 
arrow::Int16Type, arrow::Int16Type, arrow::compute::internal::Add>::ArrayScalar 
(
       ctx=0x559b37531198, arg0=..., arg1=..., out=0x7fffb3211140) at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h:753
   #10 0x0000559b33cc0df8 in 
arrow::compute::internal::applicator::ScalarBinary<arrow::Int16Type, 
arrow::Int16Type, arrow::Int16Type, arrow::compute::internal::Add>::Exec 
(ctx=0x559b37531198,
       batch=..., out=0x7fffb3211140) at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/kernels/codegen_internal.h:780
   #11 0x0000559b33686c08 in arrow::compute::detail::(anonymous 
namespace)::ScalarExecutor::ExecuteSingleSpan (this=0x559b37531090, input=..., 
out=0x7fffb3211140)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/exec.cc:891
   #12 0x0000559b3368665f in arrow::compute::detail::(anonymous 
namespace)::ScalarExecutor::ExecuteSpans (this=0x559b37531090, 
listener=0x7fffb3211350)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/exec.cc:859
   #13 0x0000559b33686001 in arrow::compute::detail::(anonymous 
namespace)::ScalarExecutor::Execute (this=0x559b37531090, batch=..., 
listener=0x7fffb3211350)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/exec.cc:808
   #14 0x0000559b336d267d in 
arrow::compute::detail::FunctionExecutorImpl::Execute (this=0x559b37531170, 
args=std::vector of length 2, capacity 2 = {...}, passed_length=-1)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/function.cc:276
   #15 0x0000559b336d0262 in arrow::compute::(anonymous 
namespace)::ExecuteInternal (func=..., args=std::vector of length 2, capacity 2 
= {...}, passed_length=-1, options=0x0,
       ctx=0x559b359cf320 
<arrow::compute::default_exec_context()::default_ctx>) at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/function.cc:341
   #16 0x0000559b336d039e in arrow::compute::Function::Execute 
(this=0x559b37530270, args=std::vector of length 2, capacity 2 = {...}, 
options=0x0,
       ctx=0x559b359cf320 
<arrow::compute::default_exec_context()::default_ctx>) at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/function.cc:348
   #17 0x0000559b3368a320 in arrow::compute::CallFunction (func_name="cxsadd", 
args=std::vector of length 2, capacity 2 = {...}, options=0x0,
       ctx=0x559b359cf320 
<arrow::compute::default_exec_context()::default_ctx>) at 
/home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/exec.cc:1369
   #18 0x0000559b3368a3db in arrow::compute::CallFunction (func_name="cxsadd", 
args=std::vector of length 2, capacity 2 = {...}, ctx=0x0)
       at /home/gpadmin/gitRepo/arrow/cpp/src/arrow/compute/exec.cc:1374
   #19 0x0000559b332837d8 in testSmallIntType () at 
/home/gpadmin/testProjects/arrow_test/main.cpp:88
   #20 0x0000559b33283de8 in main () at 
/home/gpadmin/testProjects/arrow_test/main.cpp:140
   ```
   
   ### Component(s)
   
   C++


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to