wesm commented on a change in pull request #7240:
URL: https://github.com/apache/arrow/pull/7240#discussion_r429031277



##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself
+//
+
+constexpr int kValidity = 0;
+constexpr int kBinaryOffsets = 1;
+constexpr int kPrimitiveData = 1;
+constexpr int kBinaryData = 2;
+
+// ----------------------------------------------------------------------
+// Iteration / value access utilities
+
+template <typename T, typename R = void>
+using enable_if_has_c_type_not_boolean = enable_if_t<has_c_type<T>::value &&
+                                                     
!is_boolean_type<T>::value, R>;
+
+template <typename Type, typename Enable = void>
+struct ArrayIterator;
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
+  using T = typename Type::c_type;
+  const T* values;
+  ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
+  T operator()() { return *values++; }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_boolean<Type>> {
+  BitmapReader reader;
+  ArrayIterator(const ArrayData& data)
+      : reader(data.buffers[1]->data(), data.offset, data.length) {}
+  bool operator()() {
+    bool out = reader.IsSet();
+    reader.Next();
+    return out;
+  }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_base_binary<Type>> {
+  int64_t position = 0;
+  typename TypeTraits<Type>::ArrayType arr;
+  ArrayIterator(const ArrayData& data)
+      : arr(data.Copy()) {}
+  util::string_view operator()() { return arr.GetView(position++); }
+};
+
+template <typename Type, typename Enable = void>
+struct UnboxScalar;
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static typename Type::c_type Unbox(const Datum& datum) {
+    return datum.scalar_as<ScalarType>().value;
+  }
+};
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+  static util::string_view Unbox(const Datum& datum) {
+    return util::string_view(*datum.scalar_as<BaseBinaryScalar>().value);
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct GetValueType;
+
+template <typename Type>
+struct GetValueType<Type, enable_if_has_c_type<Type>> {
+  using T = typename Type::c_type;
+};
+
+template <typename Type>
+struct GetValueType<
+    Type, enable_if_t<is_base_binary_type<Type>::value || 
is_decimal_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  using T = util::string_view;
+};
+
+// ----------------------------------------------------------------------
+// Reusable type resolvers
+
+Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& 
descrs);
+
+// ----------------------------------------------------------------------
+// Generate an array kernel given template classes
+
+void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+
+void BinaryExecFlipped(KernelContext* ctx, ArrayKernelExec exec,
+                       const ExecBatch& batch, Datum* out);
+
+// ----------------------------------------------------------------------
+// Template kernel exec function generators

Review comment:
       fix comment

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself
+//
+
+constexpr int kValidity = 0;
+constexpr int kBinaryOffsets = 1;
+constexpr int kPrimitiveData = 1;
+constexpr int kBinaryData = 2;
+
+// ----------------------------------------------------------------------
+// Iteration / value access utilities
+
+template <typename T, typename R = void>
+using enable_if_has_c_type_not_boolean = enable_if_t<has_c_type<T>::value &&
+                                                     
!is_boolean_type<T>::value, R>;
+
+template <typename Type, typename Enable = void>
+struct ArrayIterator;
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
+  using T = typename Type::c_type;
+  const T* values;
+  ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
+  T operator()() { return *values++; }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_boolean<Type>> {
+  BitmapReader reader;
+  ArrayIterator(const ArrayData& data)
+      : reader(data.buffers[1]->data(), data.offset, data.length) {}
+  bool operator()() {
+    bool out = reader.IsSet();
+    reader.Next();
+    return out;
+  }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_base_binary<Type>> {
+  int64_t position = 0;
+  typename TypeTraits<Type>::ArrayType arr;
+  ArrayIterator(const ArrayData& data)
+      : arr(data.Copy()) {}
+  util::string_view operator()() { return arr.GetView(position++); }
+};
+
+template <typename Type, typename Enable = void>
+struct UnboxScalar;
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static typename Type::c_type Unbox(const Datum& datum) {
+    return datum.scalar_as<ScalarType>().value;
+  }
+};
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+  static util::string_view Unbox(const Datum& datum) {
+    return util::string_view(*datum.scalar_as<BaseBinaryScalar>().value);
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct GetValueType;
+
+template <typename Type>
+struct GetValueType<Type, enable_if_has_c_type<Type>> {
+  using T = typename Type::c_type;
+};
+
+template <typename Type>
+struct GetValueType<
+    Type, enable_if_t<is_base_binary_type<Type>::value || 
is_decimal_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  using T = util::string_view;
+};
+
+// ----------------------------------------------------------------------
+// Reusable type resolvers
+
+Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& 
descrs);
+
+// ----------------------------------------------------------------------
+// Generate an array kernel given template classes
+
+void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+
+void BinaryExecFlipped(KernelContext* ctx, ArrayKernelExec exec,
+                       const ExecBatch& batch, Datum* out);
+
+// ----------------------------------------------------------------------
+// Template kernel exec function generators
+
+template <typename T>
+void Extend(const std::vector<T>& values, std::vector<T>* out) {
+  for (const auto& t : values) {
+    out->push_back(t);
+  }
+}
+
+const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();
+const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& IntTypes();
+const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
+
+// Number types without boolean
+const std::vector<std::shared_ptr<DataType>>& NumericTypes();
+
+// Temporal types including time and timestamps for each unit
+const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
+
+// Integer, floating point, base binary, and temporal
+const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
+
+namespace codegen {
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& in, ArrayData* out)
+template <typename Operator>
+void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), out->mutable_array());
+  }
+}
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& 
arg1,
+//                  ArrayData* out)
+template <typename Operator>
+void SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), *batch[1].array(), 
out->mutable_array());
+  }
+}
+
+// A ArrayKernelExec-creation template that iterates over primitive non-boolean
+// inputs and writes into non-boolean primitive outputs.
+//
+// It may be possible to create a more generic template that can deal with any
+// input writing to any output, but we will need to write benchmarks to
+// investigate that on all compiler targets to ensure that the additional
+// template abstractions do not incur performance overhead. This template
+// provides a reference point for performance when there are no templates
+// dealing with value iteration.
+//
+// TODO: Run benchmarks to determine if OutputAdapter is a zero-cost 
abstraction
+struct ScalarPrimitiveExec {
+  template <typename Op, typename OutType, typename Arg0Type>
+  static void Unary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0>(ctx, *arg0_data++);
+      }
+    }
+  }
+
+  template <typename Op, typename OutType, typename Arg0Type, typename 
Arg1Type>
+  static void Binary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+    using ARG1 = typename Arg1Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      auto arg1_data = batch[1].array()->GetValues<ARG1>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0, ARG1>(ctx, *arg0_data++, 
*arg1_data++);
+      }
+    }
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct OutputAdapter;
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_boolean<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_bitmap = out_arr->buffers[1]->mutable_data();
+    GenerateBitsUnrolled(out_bitmap, out_arr->offset, out_arr->length,
+                         std::forward<Generator>(generator));
+  }
+};
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_data = out_arr->GetMutableValues<typename 
Type::c_type>(kPrimitiveData);
+    // TODO: Is this as fast as a more explicitly inlined function?
+    for (int64_t i = 0 ; i < out_arr->length; ++i) {
+      *out_data++ = generator();
+    }
+  }
+};
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_base_binary<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext* ctx, Datum* out, Generator&& generator) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  }
+};
+
+// A kernel exec generator for unary functions that addresses both array and
+// scalar inputs and dispatches input iteration and output writing to other
+// templates
+//
+// This template executes the operator even on the data behind null values,
+// therefore it is generally only suitable for operators that cannot fail.
+template <typename OutType, typename Arg0Type, typename Op>

Review comment:
       Add example functor definition

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself
+//
+
+constexpr int kValidity = 0;
+constexpr int kBinaryOffsets = 1;
+constexpr int kPrimitiveData = 1;
+constexpr int kBinaryData = 2;
+
+// ----------------------------------------------------------------------
+// Iteration / value access utilities
+
+template <typename T, typename R = void>
+using enable_if_has_c_type_not_boolean = enable_if_t<has_c_type<T>::value &&
+                                                     
!is_boolean_type<T>::value, R>;
+
+template <typename Type, typename Enable = void>
+struct ArrayIterator;
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
+  using T = typename Type::c_type;
+  const T* values;
+  ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
+  T operator()() { return *values++; }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_boolean<Type>> {
+  BitmapReader reader;
+  ArrayIterator(const ArrayData& data)
+      : reader(data.buffers[1]->data(), data.offset, data.length) {}
+  bool operator()() {
+    bool out = reader.IsSet();
+    reader.Next();
+    return out;
+  }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_base_binary<Type>> {
+  int64_t position = 0;
+  typename TypeTraits<Type>::ArrayType arr;
+  ArrayIterator(const ArrayData& data)
+      : arr(data.Copy()) {}
+  util::string_view operator()() { return arr.GetView(position++); }
+};
+
+template <typename Type, typename Enable = void>
+struct UnboxScalar;
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static typename Type::c_type Unbox(const Datum& datum) {
+    return datum.scalar_as<ScalarType>().value;
+  }
+};
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+  static util::string_view Unbox(const Datum& datum) {
+    return util::string_view(*datum.scalar_as<BaseBinaryScalar>().value);
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct GetValueType;
+
+template <typename Type>
+struct GetValueType<Type, enable_if_has_c_type<Type>> {
+  using T = typename Type::c_type;
+};
+
+template <typename Type>
+struct GetValueType<
+    Type, enable_if_t<is_base_binary_type<Type>::value || 
is_decimal_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  using T = util::string_view;
+};
+
+// ----------------------------------------------------------------------
+// Reusable type resolvers
+
+Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& 
descrs);
+
+// ----------------------------------------------------------------------
+// Generate an array kernel given template classes
+
+void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+
+void BinaryExecFlipped(KernelContext* ctx, ArrayKernelExec exec,
+                       const ExecBatch& batch, Datum* out);
+
+// ----------------------------------------------------------------------
+// Template kernel exec function generators
+
+template <typename T>
+void Extend(const std::vector<T>& values, std::vector<T>* out) {
+  for (const auto& t : values) {
+    out->push_back(t);
+  }
+}
+
+const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();
+const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& IntTypes();
+const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
+
+// Number types without boolean
+const std::vector<std::shared_ptr<DataType>>& NumericTypes();
+
+// Temporal types including time and timestamps for each unit
+const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
+
+// Integer, floating point, base binary, and temporal
+const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
+
+namespace codegen {
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& in, ArrayData* out)
+template <typename Operator>
+void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), out->mutable_array());
+  }
+}
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& 
arg1,
+//                  ArrayData* out)
+template <typename Operator>
+void SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), *batch[1].array(), 
out->mutable_array());
+  }
+}
+
+// A ArrayKernelExec-creation template that iterates over primitive non-boolean
+// inputs and writes into non-boolean primitive outputs.
+//
+// It may be possible to create a more generic template that can deal with any
+// input writing to any output, but we will need to write benchmarks to
+// investigate that on all compiler targets to ensure that the additional
+// template abstractions do not incur performance overhead. This template
+// provides a reference point for performance when there are no templates
+// dealing with value iteration.
+//
+// TODO: Run benchmarks to determine if OutputAdapter is a zero-cost 
abstraction
+struct ScalarPrimitiveExec {
+  template <typename Op, typename OutType, typename Arg0Type>
+  static void Unary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0>(ctx, *arg0_data++);
+      }
+    }
+  }
+
+  template <typename Op, typename OutType, typename Arg0Type, typename 
Arg1Type>
+  static void Binary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+    using ARG1 = typename Arg1Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      auto arg1_data = batch[1].array()->GetValues<ARG1>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0, ARG1>(ctx, *arg0_data++, 
*arg1_data++);
+      }
+    }
+  }
+};
+
+template <typename Type, typename Enable = void>

Review comment:
       Explain

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself
+//
+
+constexpr int kValidity = 0;
+constexpr int kBinaryOffsets = 1;
+constexpr int kPrimitiveData = 1;
+constexpr int kBinaryData = 2;
+
+// ----------------------------------------------------------------------
+// Iteration / value access utilities
+
+template <typename T, typename R = void>
+using enable_if_has_c_type_not_boolean = enable_if_t<has_c_type<T>::value &&
+                                                     
!is_boolean_type<T>::value, R>;
+
+template <typename Type, typename Enable = void>
+struct ArrayIterator;
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
+  using T = typename Type::c_type;
+  const T* values;
+  ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
+  T operator()() { return *values++; }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_boolean<Type>> {
+  BitmapReader reader;
+  ArrayIterator(const ArrayData& data)
+      : reader(data.buffers[1]->data(), data.offset, data.length) {}
+  bool operator()() {
+    bool out = reader.IsSet();
+    reader.Next();
+    return out;
+  }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_base_binary<Type>> {
+  int64_t position = 0;
+  typename TypeTraits<Type>::ArrayType arr;
+  ArrayIterator(const ArrayData& data)
+      : arr(data.Copy()) {}
+  util::string_view operator()() { return arr.GetView(position++); }
+};
+
+template <typename Type, typename Enable = void>
+struct UnboxScalar;
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static typename Type::c_type Unbox(const Datum& datum) {
+    return datum.scalar_as<ScalarType>().value;
+  }
+};
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+  static util::string_view Unbox(const Datum& datum) {
+    return util::string_view(*datum.scalar_as<BaseBinaryScalar>().value);
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct GetValueType;
+
+template <typename Type>
+struct GetValueType<Type, enable_if_has_c_type<Type>> {
+  using T = typename Type::c_type;
+};
+
+template <typename Type>
+struct GetValueType<
+    Type, enable_if_t<is_base_binary_type<Type>::value || 
is_decimal_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  using T = util::string_view;
+};
+
+// ----------------------------------------------------------------------
+// Reusable type resolvers
+
+Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& 
descrs);
+
+// ----------------------------------------------------------------------
+// Generate an array kernel given template classes
+
+void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+
+void BinaryExecFlipped(KernelContext* ctx, ArrayKernelExec exec,
+                       const ExecBatch& batch, Datum* out);
+
+// ----------------------------------------------------------------------
+// Template kernel exec function generators
+
+template <typename T>
+void Extend(const std::vector<T>& values, std::vector<T>* out) {
+  for (const auto& t : values) {
+    out->push_back(t);
+  }
+}
+
+const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();
+const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
+const std::vector<std::shared_ptr<DataType>>& IntTypes();
+const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
+
+// Number types without boolean
+const std::vector<std::shared_ptr<DataType>>& NumericTypes();
+
+// Temporal types including time and timestamps for each unit
+const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
+
+// Integer, floating point, base binary, and temporal
+const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
+
+namespace codegen {
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& in, ArrayData* out)
+template <typename Operator>
+void SimpleUnary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), out->mutable_array());
+  }
+}
+
+// Generate an ArrayKernelExec given a functor that handles all of its own
+// iteration, etc.
+//
+// Operator must implement
+//
+// static void Call(KernelContext*, const ArrayData& arg0, const ArrayData& 
arg1,
+//                  ArrayData* out)
+template <typename Operator>
+void SimpleBinary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  } else if (batch.length > 0) {
+    Operator::Call(ctx, *batch[0].array(), *batch[1].array(), 
out->mutable_array());
+  }
+}
+
+// A ArrayKernelExec-creation template that iterates over primitive non-boolean
+// inputs and writes into non-boolean primitive outputs.
+//
+// It may be possible to create a more generic template that can deal with any
+// input writing to any output, but we will need to write benchmarks to
+// investigate that on all compiler targets to ensure that the additional
+// template abstractions do not incur performance overhead. This template
+// provides a reference point for performance when there are no templates
+// dealing with value iteration.
+//
+// TODO: Run benchmarks to determine if OutputAdapter is a zero-cost 
abstraction
+struct ScalarPrimitiveExec {
+  template <typename Op, typename OutType, typename Arg0Type>
+  static void Unary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0>(ctx, *arg0_data++);
+      }
+    }
+  }
+
+  template <typename Op, typename OutType, typename Arg0Type, typename 
Arg1Type>
+  static void Binary(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    using OUT = typename OutType::c_type;
+    using ARG0 = typename Arg0Type::c_type;
+    using ARG1 = typename Arg1Type::c_type;
+
+    if (batch[0].kind() == Datum::SCALAR || batch[1].kind() == Datum::SCALAR) {
+      ctx->SetStatus(Status::NotImplemented("NYI"));
+    } else {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      auto arg0_data = batch[0].array()->GetValues<ARG0>(kPrimitiveData);
+      auto arg1_data = batch[1].array()->GetValues<ARG1>(kPrimitiveData);
+      for (int64_t i = 0; i < batch.length; ++i) {
+        *out_data++ = Op::template Call<OUT, ARG0, ARG1>(ctx, *arg0_data++, 
*arg1_data++);
+      }
+    }
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct OutputAdapter;
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_boolean<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_bitmap = out_arr->buffers[1]->mutable_data();
+    GenerateBitsUnrolled(out_bitmap, out_arr->offset, out_arr->length,
+                         std::forward<Generator>(generator));
+  }
+};
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_has_c_type_not_boolean<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext*, Datum* out, Generator&& generator) {
+    ArrayData* out_arr = out->mutable_array();
+    auto out_data = out_arr->GetMutableValues<typename 
Type::c_type>(kPrimitiveData);
+    // TODO: Is this as fast as a more explicitly inlined function?
+    for (int64_t i = 0 ; i < out_arr->length; ++i) {
+      *out_data++ = generator();
+    }
+  }
+};
+
+template <typename Type>
+struct OutputAdapter<Type, enable_if_base_binary<Type>> {
+  template <typename Generator>
+  static void Write(KernelContext* ctx, Datum* out, Generator&& generator) {
+    ctx->SetStatus(Status::NotImplemented("NYI"));
+  }
+};
+
+// A kernel exec generator for unary functions that addresses both array and
+// scalar inputs and dispatches input iteration and output writing to other
+// templates
+//
+// This template executes the operator even on the data behind null values,
+// therefore it is generally only suitable for operators that cannot fail.
+template <typename OutType, typename Arg0Type, typename Op>
+struct ScalarUnary {
+  using OutScalar = typename TypeTraits<OutType>::ScalarType;
+
+  using OUT = typename GetValueType<OutType>::T;
+  using ARG0 = typename GetValueType<Arg0Type>::T;
+
+  static void Array(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ArrayIterator<Arg0Type> arg0(*batch[0].array());
+    OutputAdapter<OutType>::Write(ctx, out, [&]() -> OUT {
+        return Op::template Call<OUT, ARG0>(ctx, arg0());
+    });
+  }
+
+  static void Scalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].scalar()->is_valid) {
+      ARG0 arg0 = UnboxScalar<Arg0Type>::Unbox(batch[0]);
+      out->value = std::make_shared<OutScalar>(Op::template Call<OUT, 
ARG0>(ctx, arg0),
+                                               out->type());
+    } else {
+      out->value = MakeNullScalar(batch[0].type());
+    }
+  }
+
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::ARRAY) {
+      return Array(ctx, batch, out);
+    } else {
+      return Scalar(ctx, batch, out);
+    }
+  }
+};
+
+// An alternative to ScalarUnary that Applies a scalar operation with state on
+// only the not-null values of a single array
+template <typename OutType, typename Arg0Type, typename Op>
+struct ScalarUnaryNotNullStateful {
+  using ThisType = ScalarUnaryNotNullStateful<OutType, Arg0Type, Op>;
+  using OutScalar = typename TypeTraits<OutType>::ScalarType;
+  using OUT = typename GetValueType<OutType>::T;
+  using ARG0 = typename GetValueType<Arg0Type>::T;
+
+  Op op;
+  ScalarUnaryNotNullStateful(Op op) : op(std::move(op)) {}
+
+  template <typename Type, typename Enable = void>
+  struct ArrayExec {
+    static void Exec(const ThisType& functor, KernelContext* ctx, const 
ExecBatch& batch,
+                     Datum* out) {
+      DCHECK(false);
+    }
+  };
+
+  template <typename Type>
+  struct ArrayExec<Type, enable_if_t<has_c_type<Type>::value &&
+                                     !is_boolean_type<Type>::value>> {
+    static void Exec(const ThisType& functor, KernelContext* ctx, const 
ExecBatch& batch,
+                     Datum* out) {
+      ArrayData* out_arr = out->mutable_array();
+      auto out_data = out_arr->GetMutableValues<OUT>(kPrimitiveData);
+      VisitArrayDataInline<Arg0Type>(*batch[0].array(), 
[&](util::optional<ARG0> v) {
+          if (v.has_value()) {
+            *out_data = functor.op.template Call<OUT, ARG0>(ctx, *v);
+          }
+          ++out_data;
+        });
+    }
+  };
+
+  template <typename Type>
+  struct ArrayExec<Type, enable_if_t<is_boolean_type<Type>::value>> {
+    static void Exec(const ThisType& functor, KernelContext* ctx, const 
ExecBatch& batch,
+                     Datum* out) {
+      ArrayData* out_arr = out->mutable_array();
+      FirstTimeBitmapWriter out_writer(out_arr->buffers[1]->mutable_data(),
+                                       out_arr->offset, out_arr->length);
+      VisitArrayDataInline<Arg0Type>(*batch[0].array(), 
[&](util::optional<ARG0> v) {
+          if (v.has_value()) {
+            if (functor.op.template Call<OUT, ARG0>(ctx, *v)) {
+              out_writer.Set();
+            }
+          }
+          out_writer.Next();
+        });
+      out_writer.Finish();
+    }
+  };
+
+  void Scalar(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].scalar()->is_valid) {
+      ARG0 arg0 = UnboxScalar<Arg0Type>::Unbox(batch[0]);
+      out->value = std::make_shared<OutScalar>(
+          this->op.template Call<OUT, ARG0>(ctx, arg0),
+          out->type());
+    } else {
+      out->value = MakeNullScalar(batch[0].type());
+    }
+  }
+
+  void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    if (batch[0].kind() == Datum::ARRAY) {
+      ArrayExec<OutType>::Exec(*this, ctx, batch, out);
+    } else {
+      return Scalar(ctx, batch, out);
+    }
+  }
+};
+
+// An alternative to ScalarUnary that Applies a scalar operation on only the
+// not-null values of a single array. The operator is not stateful; if the
+// operator requires some initialization use ScalarUnaryNotNullStateful
+template <typename OutType, typename Arg0Type, typename Op>
+struct ScalarUnaryNotNull {
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    // Seed kernel with dummy state
+    ScalarUnaryNotNullStateful<OutType, Arg0Type, Op> kernel({});
+    return kernel.Exec(ctx, batch, out);
+  }
+};
+
+// A kernel exec generator for binary functions that addresses both array and
+// scalar inputs and dispatches input iteration and output writing to other
+// templates
+//
+// This template executes the operator even on the data behind null values,
+// therefore it is generally only suitable for operators that cannot fail.
+template <typename OutType, typename Arg0Type, typename Arg1Type, typename Op,

Review comment:
       Add example functor

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself

Review comment:
       note to self: fix this

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
##########
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implementation of casting to (or between) list types
+
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/kernels/common.h"
+#include "arrow/compute/kernels/scalar_cast_internal.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+template <typename Type>
+void CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+  const CastOptions& options = checked_cast<const 
CastState&>(*ctx->state()).options;
+
+  const ArrayData& input = *batch[0].array();
+  ArrayData* result = out->mutable_array();
+
+  if (input.offset != 0) {
+    ctx->SetStatus(Status::NotImplemented(
+        "Casting sliced lists (non-zero offset) not yet implemented"));
+    return;
+  }
+  // Copy buffers from parent
+  result->buffers = input.buffers;
+
+  auto child_type = checked_cast<const Type&>(*result->type).value_type();
+
+  Datum casted_child;
+  KERNEL_RETURN_IF_ERROR(
+      ctx, Cast(Datum(input.child_data[0]), child_type, options, 
ctx->exec_context())
+               .Value(&casted_child));
+  DCHECK_EQ(Datum::ARRAY, casted_child.kind());
+  result->child_data.push_back(casted_child.array());
+}
+
+OutputType kOutputTargetType(ResolveOutputFromOptions);

Review comment:
       note to self: add static

##########
File path: cpp/src/arrow/compute/kernels/scalar_cast_numeric.cc
##########
@@ -0,0 +1,565 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Implementation of casting to integer or floating point types

Review comment:
       note also includes decimal

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -0,0 +1,710 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/scalar.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/optional.h"
+#include "arrow/util/string_view.h"
+#include "arrow/visitor_inline.h"
+
+namespace arrow {
+
+using internal::BitmapReader;
+using internal::FirstTimeBitmapWriter;
+using internal::GenerateBitsUnrolled;
+
+namespace compute {
+
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)             \
+  do {                                                \
+    Status _st = (expr);                              \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {             \
+      _st.AddContextLine(__FILE__, __LINE__, #expr);  \
+      ctx->SetStatus(_st);                            \
+      return;                                         \
+    }                                                 \
+  } while (0)
+
+#else
+
+#define KERNEL_RETURN_IF_ERROR(ctx, expr)       \
+  do {                                          \
+    Status _st = (expr);                        \
+    if (ARROW_PREDICT_FALSE(!_st.ok())) {       \
+      ctx->SetStatus(_st);                      \
+      return;                                   \
+    }                                           \
+  } while (0)
+
+#endif  // ARROW_EXTRA_ERROR_CONTEXT
+
+// A kernel that exposes Call methods that handles iteration over ArrayData
+// inputs itself
+//
+
+constexpr int kValidity = 0;
+constexpr int kBinaryOffsets = 1;
+constexpr int kPrimitiveData = 1;
+constexpr int kBinaryData = 2;
+
+// ----------------------------------------------------------------------
+// Iteration / value access utilities
+
+template <typename T, typename R = void>
+using enable_if_has_c_type_not_boolean = enable_if_t<has_c_type<T>::value &&
+                                                     
!is_boolean_type<T>::value, R>;
+
+template <typename Type, typename Enable = void>
+struct ArrayIterator;
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_has_c_type_not_boolean<Type>> {
+  using T = typename Type::c_type;
+  const T* values;
+  ArrayIterator(const ArrayData& data) : values(data.GetValues<T>(1)) {}
+  T operator()() { return *values++; }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_boolean<Type>> {
+  BitmapReader reader;
+  ArrayIterator(const ArrayData& data)
+      : reader(data.buffers[1]->data(), data.offset, data.length) {}
+  bool operator()() {
+    bool out = reader.IsSet();
+    reader.Next();
+    return out;
+  }
+};
+
+template <typename Type>
+struct ArrayIterator<Type, enable_if_base_binary<Type>> {
+  int64_t position = 0;
+  typename TypeTraits<Type>::ArrayType arr;
+  ArrayIterator(const ArrayData& data)
+      : arr(data.Copy()) {}
+  util::string_view operator()() { return arr.GetView(position++); }
+};
+
+template <typename Type, typename Enable = void>
+struct UnboxScalar;
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_has_c_type<Type>> {
+  using ScalarType = typename TypeTraits<Type>::ScalarType;
+  static typename Type::c_type Unbox(const Datum& datum) {
+    return datum.scalar_as<ScalarType>().value;
+  }
+};
+
+template <typename Type>
+struct UnboxScalar<Type, enable_if_base_binary<Type>> {
+  static util::string_view Unbox(const Datum& datum) {
+    return util::string_view(*datum.scalar_as<BaseBinaryScalar>().value);
+  }
+};
+
+template <typename Type, typename Enable = void>
+struct GetValueType;
+
+template <typename Type>
+struct GetValueType<Type, enable_if_has_c_type<Type>> {
+  using T = typename Type::c_type;
+};
+
+template <typename Type>
+struct GetValueType<
+    Type, enable_if_t<is_base_binary_type<Type>::value || 
is_decimal_type<Type>::value ||
+                      is_fixed_size_binary_type<Type>::value>> {
+  using T = util::string_view;
+};
+
+// ----------------------------------------------------------------------
+// Reusable type resolvers
+
+Result<ValueDescr> FirstType(KernelContext*, const std::vector<ValueDescr>& 
descrs);
+
+// ----------------------------------------------------------------------
+// Generate an array kernel given template classes
+
+void ExecFail(KernelContext* ctx, const ExecBatch& batch, Datum* out);
+
+void BinaryExecFlipped(KernelContext* ctx, ArrayKernelExec exec,
+                       const ExecBatch& batch, Datum* out);
+
+// ----------------------------------------------------------------------
+// Template kernel exec function generators
+
+template <typename T>
+void Extend(const std::vector<T>& values, std::vector<T>* out) {
+  for (const auto& t : values) {
+    out->push_back(t);
+  }
+}
+
+const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();

Review comment:
       Explain what these are for




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to