[arrow] branch master updated: ARROW-8989: [C++][Doc] Document available compute functions

wesm Mon, 13 Jul 2020 10:49:11 -0700

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new 9d2079c  ARROW-8989: [C++][Doc] Document available compute functions
9d2079c is described below

commit 9d2079c2ead31399b724ecc3775d61432a8096af
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon Jul 13 12:48:30 2020 -0500

    ARROW-8989: [C++][Doc] Document available compute functions
    
    Also fix glaring bugs in arithmetic kernels
    (signed overflow detection was broken).
    
    Closes #7695 from pitrou/ARROW-8989-doc-compute-functions
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 c_glib/arrow-glib/compute.cpp                      |   5 +-
 cpp/src/arrow/array/validate.cc                    |   7 +-
 cpp/src/arrow/compute/api.h                        |   4 +
 cpp/src/arrow/compute/api_aggregate.h              |  61 +--
 cpp/src/arrow/compute/api_scalar.h                 |  97 ++--
 cpp/src/arrow/compute/api_vector.h                 |  37 +-
 cpp/src/arrow/compute/cast.cc                      |   2 +-
 cpp/src/arrow/compute/cast.h                       |   5 +
 cpp/src/arrow/compute/exec.h                       |  14 +-
 cpp/src/arrow/compute/function.h                   |   6 +
 cpp/src/arrow/compute/kernels/aggregate_basic.cc   |   2 +-
 cpp/src/arrow/compute/kernels/aggregate_test.cc    |   2 +-
 cpp/src/arrow/compute/kernels/scalar_arithmetic.cc |  28 +-
 .../compute/kernels/scalar_arithmetic_test.cc      |  47 +-
 cpp/src/arrow/compute/registry.h                   |   2 +-
 cpp/src/arrow/scalar.h                             |  40 +-
 cpp/src/arrow/util/int_util.h                      |  33 +-
 cpp/src/parquet/column_reader.cc                   |   7 +-
 docs/source/conf.py                                |   7 +-
 docs/source/cpp/api.rst                            |   2 +
 .../cpp/{getting_started.rst => api/compute.rst}   |  59 ++-
 docs/source/cpp/compute.rst                        | 526 +++++++++++++++++++++
 docs/source/cpp/getting_started.rst                |   1 +
 docs/source/python/api/arrays.rst                  |  71 +--
 docs/source/python/dataset.rst                     |   4 +-
 25 files changed, 883 insertions(+), 186 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index d8d0bdc..3e31899 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -676,7 +676,7 @@ garrow_count_options_set_property(GObject *object,
   switch (prop_id) {
   case PROP_MODE:
     priv->options.count_mode =
-      static_cast<arrow::compute::CountOptions::mode>(g_value_get_enum(value));
+      static_cast<arrow::compute::CountOptions::Mode>(g_value_get_enum(value));
     break;
   default:
     G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -706,7 +706,8 @@ static void
 garrow_count_options_init(GArrowCountOptions *object)
 {
   auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
-  new(&priv->options) 
arrow::compute::CountOptions(arrow::compute::CountOptions::COUNT_ALL);
+  new(&priv->options) arrow::compute::CountOptions(
+    arrow::compute::CountOptions::COUNT_NON_NULL);
 }
 
 static void
diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc
index 3dd0ffd..8fb8b59 100644
--- a/cpp/src/arrow/array/validate.cc
+++ b/cpp/src/arrow/array/validate.cc
@@ -98,7 +98,7 @@ struct ValidateArrayVisitor {
     if (value_size < 0) {
       return Status::Invalid("FixedSizeListArray has negative value size ", 
value_size);
     }
-    if (HasMultiplyOverflow(len, value_size) ||
+    if (HasPositiveMultiplyOverflow(len, value_size) ||
         array.values()->length() != len * value_size) {
       return Status::Invalid("Values Length (", array.values()->length(),
                              ") is not equal to the length (", len,
@@ -329,7 +329,7 @@ Status ValidateArray(const Array& array) {
                            type.ToString(), ", got ", data.buffers.size());
   }
   // This check is required to avoid addition overflow below
-  if (HasAdditionOverflow(array.length(), array.offset())) {
+  if (HasPositiveAdditionOverflow(array.length(), array.offset())) {
     return Status::Invalid("Array of type ", type.ToString(),
                            " has impossibly large length and offset");
   }
@@ -346,7 +346,8 @@ Status ValidateArray(const Array& array) {
         min_buffer_size = BitUtil::BytesForBits(array.length() + 
array.offset());
         break;
       case DataTypeLayout::FIXED_WIDTH:
-        if (HasMultiplyOverflow(array.length() + array.offset(), 
spec.byte_width)) {
+        if (HasPositiveMultiplyOverflow(array.length() + array.offset(),
+                                        spec.byte_width)) {
           return Status::Invalid("Array of type ", type.ToString(),
                                  " has impossibly large length and offset");
         }
diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h
index 3fc6e22..a890cd3 100644
--- a/cpp/src/arrow/compute/api.h
+++ b/cpp/src/arrow/compute/api.h
@@ -20,6 +20,10 @@
 
 #pragma once
 
+/// \defgroup compute-concrete-options Concrete option classes for compute 
functions
+/// @{
+/// @}
+
 #include "arrow/compute/api_aggregate.h"  // IWYU pragma: export
 #include "arrow/compute/api_scalar.h"     // IWYU pragma: export
 #include "arrow/compute/api_vector.h"     // IWYU pragma: export
diff --git a/cpp/src/arrow/compute/api_aggregate.h 
b/cpp/src/arrow/compute/api_aggregate.h
index 82a4ebf..72b3108 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -37,25 +37,47 @@ class ExecContext;
 // ----------------------------------------------------------------------
 // Aggregate functions
 
-/// \class CountOptions
+/// \addtogroup compute-concrete-options
+/// @{
+
+/// \brief Control Count kernel behavior
 ///
-/// The user control the Count kernel behavior with this class. By default, the
-/// it will count all non-null values.
+/// By default, all non-null values are counted.
 struct ARROW_EXPORT CountOptions : public FunctionOptions {
-  enum mode {
-    // Count all non-null values.
-    COUNT_ALL = 0,
-    // Count all null values.
+  enum Mode {
+    /// Count all non-null values.
+    COUNT_NON_NULL = 0,
+    /// Count all null values.
     COUNT_NULL,
   };
 
-  explicit CountOptions(enum mode count_mode) : count_mode(count_mode) {}
+  explicit CountOptions(enum Mode count_mode) : count_mode(count_mode) {}
+
+  static CountOptions Defaults() { return CountOptions(COUNT_NON_NULL); }
+
+  enum Mode count_mode = COUNT_NON_NULL;
+};
+
+/// \brief Control MinMax kernel behavior
+///
+/// By default, null values are ignored
+struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
+  enum Mode {
+    /// Skip null values
+    SKIP = 0,
+    /// Any nulls will result in null output
+    OUTPUT_NULL
+  };
+
+  explicit MinMaxOptions(enum Mode null_handling = SKIP) : 
null_handling(null_handling) {}
 
-  static CountOptions Defaults() { return CountOptions(COUNT_ALL); }
+  static MinMaxOptions Defaults() { return MinMaxOptions{}; }
 
-  enum mode count_mode = COUNT_ALL;
+  enum Mode null_handling = SKIP;
 };
 
+/// @}
+
 /// \brief Count non-null (or null) values in an array.
 ///
 /// \param[in] options counting options, see CountOptions for more information
@@ -91,25 +113,6 @@ Result<Datum> Mean(const Datum& value, ExecContext* ctx = 
NULLPTR);
 ARROW_EXPORT
 Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);
 
-/// \class MinMaxOptions
-///
-/// The user can control the MinMax kernel behavior with this class. By 
default,
-/// it will skip null if there is a null value present.
-struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
-  enum mode {
-    /// skip null values
-    SKIP = 0,
-    /// any nulls will result in null output
-    OUTPUT_NULL
-  };
-
-  explicit MinMaxOptions(enum mode null_handling = SKIP) : 
null_handling(null_handling) {}
-
-  static MinMaxOptions Defaults() { return MinMaxOptions{}; }
-
-  enum mode null_handling = SKIP;
-};
-
 /// \brief Calculate the min / max of a numeric array
 ///
 /// This function returns both the min and max as a struct scalar, with type
diff --git a/cpp/src/arrow/compute/api_scalar.h 
b/cpp/src/arrow/compute/api_scalar.h
index 858e1ff..1d8ef09 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -33,13 +33,64 @@
 namespace arrow {
 namespace compute {
 
-// ----------------------------------------------------------------------
+/// \addtogroup compute-concrete-options
+///
+/// @{
 
 struct ArithmeticOptions : public FunctionOptions {
   ArithmeticOptions() : check_overflow(false) {}
   bool check_overflow;
 };
 
+struct ARROW_EXPORT BinaryContainsExactOptions : public FunctionOptions {
+  explicit BinaryContainsExactOptions(std::string pattern)
+      : pattern(std::move(pattern)) {}
+
+  /// The exact pattern to look for inside input values.
+  std::string pattern;
+};
+
+/// Options for IsIn and Match functions
+struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
+  explicit SetLookupOptions(Datum value_set, bool skip_nulls)
+      : value_set(std::move(value_set)), skip_nulls(skip_nulls) {}
+
+  /// The set of values to look up input values into.
+  Datum value_set;
+  /// Whether nulls in `value_set` count for lookup.
+  ///
+  /// If true, any null in `value_set` is ignored and nulls in the input
+  /// produce null (Match) or false (IsIn) values in the output.
+  /// If false, any null in `value_set` is successfully matched in
+  /// the input.
+  bool skip_nulls;
+};
+
+struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
+  explicit StrptimeOptions(std::string format, TimeUnit::type unit)
+      : format(format), unit(unit) {}
+
+  std::string format;
+  TimeUnit::type unit;
+};
+
+enum CompareOperator : int8_t {
+  EQUAL,
+  NOT_EQUAL,
+  GREATER,
+  GREATER_EQUAL,
+  LESS,
+  LESS_EQUAL,
+};
+
+struct CompareOptions : public FunctionOptions {
+  explicit CompareOptions(CompareOperator op) : op(op) {}
+
+  enum CompareOperator op;
+};
+
+/// @}
+
 /// \brief Add two values together. Array values must be the same length. If
 /// either addend is null the result will be null.
 ///
@@ -79,21 +130,6 @@ Result<Datum> Multiply(const Datum& left, const Datum& 
right,
                        ArithmeticOptions options = ArithmeticOptions(),
                        ExecContext* ctx = NULLPTR);
 
-enum CompareOperator {
-  EQUAL,
-  NOT_EQUAL,
-  GREATER,
-  GREATER_EQUAL,
-  LESS,
-  LESS_EQUAL,
-};
-
-struct CompareOptions : public FunctionOptions {
-  explicit CompareOptions(CompareOperator op) : op(op) {}
-
-  enum CompareOperator op;
-};
-
 /// \brief Compare a numeric array with a scalar.
 ///
 /// \param[in] left datum to compare, must be an Array
@@ -185,15 +221,6 @@ Result<Datum> KleeneOr(const Datum& left, const Datum& 
right, ExecContext* ctx =
 ARROW_EXPORT
 Result<Datum> Xor(const Datum& left, const Datum& right, ExecContext* ctx = 
NULLPTR);
 
-/// For set lookup operations like IsIn, Match
-struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
-  explicit SetLookupOptions(Datum value_set, bool skip_nulls)
-      : value_set(std::move(value_set)), skip_nulls(skip_nulls) {}
-
-  Datum value_set;
-  bool skip_nulls;
-};
-
 /// \brief IsIn returns true for each element of `values` that is contained in
 /// `value_set`
 ///
@@ -274,25 +301,5 @@ ARROW_EXPORT
 Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
                        ExecContext* ctx = NULLPTR);
 
-// ----------------------------------------------------------------------
-// String functions
-
-struct ARROW_EXPORT BinaryContainsExactOptions : public FunctionOptions {
-  explicit BinaryContainsExactOptions(std::string pattern) : pattern(pattern) 
{}
-
-  std::string pattern;
-};
-
-// ----------------------------------------------------------------------
-// Temporal functions
-
-struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
-  explicit StrptimeOptions(std::string format, TimeUnit::type unit)
-      : format(format), unit(unit) {}
-
-  std::string format;
-  TimeUnit::type unit;
-};
-
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/api_vector.h 
b/cpp/src/arrow/compute/api_vector.h
index 28812c3..c3e9dc9 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -29,6 +29,9 @@ namespace compute {
 
 class ExecContext;
 
+/// \addtogroup compute-concrete-options
+/// @{
+
 struct FilterOptions : public FunctionOptions {
   /// Configure the action taken when a slot of the selection mask is null
   enum NullSelectionBehavior {
@@ -46,6 +49,25 @@ struct FilterOptions : public FunctionOptions {
   NullSelectionBehavior null_selection_behavior = DROP;
 };
 
+struct ARROW_EXPORT TakeOptions : public FunctionOptions {
+  explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}
+
+  bool boundscheck = true;
+  static TakeOptions BoundsCheck() { return TakeOptions(true); }
+  static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
+  static TakeOptions Defaults() { return BoundsCheck(); }
+};
+
+/// \brief Partitioning options for NthToIndices
+struct PartitionOptions : public FunctionOptions {
+  explicit PartitionOptions(int64_t pivot) : pivot(pivot) {}
+
+  /// The index into the equivalent sorted array of the partition pivot 
element.
+  int64_t pivot;
+};
+
+/// @}
+
 /// \brief Filter with a boolean selection filter
 ///
 /// The output will be populated with values from the input at positions
@@ -85,15 +107,6 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndices(
 
 }  // namespace internal
 
-struct ARROW_EXPORT TakeOptions : public FunctionOptions {
-  explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}
-
-  bool boundscheck = true;
-  static TakeOptions BoundsCheck() { return TakeOptions(true); }
-  static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
-  static TakeOptions Defaults() { return BoundsCheck(); }
-};
-
 /// \brief Take from an array of values at indices in another array
 ///
 /// The output array will be of the same type as the input values
@@ -121,11 +134,6 @@ Result<std::shared_ptr<Array>> Take(const Array& values, 
const Array& indices,
                                     const TakeOptions& options = 
TakeOptions::Defaults(),
                                     ExecContext* ctx = NULLPTR);
 
-struct PartitionOptions : public FunctionOptions {
-  explicit PartitionOptions(int64_t pivot) : pivot(pivot) {}
-  int64_t pivot;
-};
-
 /// \brief Returns indices that partition an array around n-th
 /// sorted element.
 ///
@@ -178,6 +186,7 @@ ARROW_EXPORT extern const char kValuesFieldName[];
 ARROW_EXPORT extern const char kCountsFieldName[];
 ARROW_EXPORT extern const int32_t kValuesFieldIndex;
 ARROW_EXPORT extern const int32_t kCountsFieldIndex;
+
 /// \brief Return counts of unique elements from an array-like object.
 ///
 /// Note that the counts do not include counts for nulls in the array.  These 
can be
diff --git a/cpp/src/arrow/compute/cast.cc b/cpp/src/arrow/compute/cast.cc
index 9c8ea66..211e5a2 100644
--- a/cpp/src/arrow/compute/cast.cc
+++ b/cpp/src/arrow/compute/cast.cc
@@ -136,7 +136,7 @@ Result<const ScalarKernel*> CastFunction::DispatchExact(
 
   // Validate arity
   if (passed_num_args != 1) {
-    return Status::Invalid("Cast sunctions accept 1 argument but passed ",
+    return Status::Invalid("Cast functions accept 1 argument but passed ",
                            passed_num_args);
   }
   std::vector<const ScalarKernel*> candidate_kernels;
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 907eef3..82dd357 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -38,6 +38,9 @@ namespace compute {
 
 class ExecContext;
 
+/// \addtogroup compute-concrete-options
+/// @{
+
 struct ARROW_EXPORT CastOptions : public FunctionOptions {
   CastOptions()
       : allow_int_overflow(false),
@@ -73,6 +76,8 @@ struct ARROW_EXPORT CastOptions : public FunctionOptions {
   bool allow_invalid_utf8;
 };
 
+/// @}
+
 // Cast functions are _not_ registered in the FunctionRegistry, though they use
 // the same execution machinery
 class CastFunction : public ScalarFunction {
diff --git a/cpp/src/arrow/compute/exec.h b/cpp/src/arrow/compute/exec.h
index aae37c7..142e149 100644
--- a/cpp/src/arrow/compute/exec.h
+++ b/cpp/src/arrow/compute/exec.h
@@ -211,18 +211,26 @@ struct ExecBatch {
   }
 };
 
-/// \brief One-shot invoker for all types of functions. Does kernel dispatch,
-/// argument checking, iteration of ChunkedArray inputs, and wrapping of
-/// outputs
+/// \defgroup compute-call-function One-shot calls to compute functions
+///
+/// @{
+
+/// \brief One-shot invoker for all types of functions.
+///
+/// Does kernel dispatch, argument checking, iteration of ChunkedArray inputs,
+/// and wrapping of outputs.
 ARROW_EXPORT
 Result<Datum> CallFunction(const std::string& func_name, const 
std::vector<Datum>& args,
                            const FunctionOptions* options, ExecContext* ctx = 
NULLPTR);
 
 /// \brief Variant of CallFunction which uses a function's default options.
+///
 /// NB: Some functions require FunctionOptions be provided.
 ARROW_EXPORT
 Result<Datum> CallFunction(const std::string& func_name, const 
std::vector<Datum>& args,
                            ExecContext* ctx = NULLPTR);
 
+/// @}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 67af4df..93a200e 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -35,6 +35,10 @@
 namespace arrow {
 namespace compute {
 
+/// \defgroup compute-functions Abstract compute function API
+///
+/// @{
+
 /// \brief Base class for specifying options configuring a function's behavior,
 /// such as error handling.
 struct ARROW_EXPORT FunctionOptions {};
@@ -277,5 +281,7 @@ class ARROW_EXPORT MetaFunction : public Function {
       : Function(std::move(name), Function::META, arity, default_options) {}
 };
 
+/// @}
+
 }  // namespace compute
 }  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc 
b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 53e89ce..8765914 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -61,7 +61,7 @@ struct CountImpl : public ScalarAggregator {
   void Finalize(KernelContext* ctx, Datum* out) override {
     const auto& state = checked_cast<const CountImpl&>(*ctx->state());
     switch (state.options.count_mode) {
-      case CountOptions::COUNT_ALL:
+      case CountOptions::COUNT_NON_NULL:
         *out = Datum(state.non_nulls);
         break;
       case CountOptions::COUNT_NULL:
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index 3b2d4e0..db548f2 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -277,7 +277,7 @@ static CountPair NaiveCount(const Array& array) {
 }
 
 void ValidateCount(const Array& input, CountPair expected) {
-  CountOptions all = CountOptions(CountOptions::COUNT_ALL);
+  CountOptions all = CountOptions(CountOptions::COUNT_NON_NULL);
   CountOptions nulls = CountOptions(CountOptions::COUNT_NULL);
 
   ASSERT_OK_AND_ASSIGN(Datum result, Count(input, all));
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc 
b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 82a8f15..1f0cd37 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/compute/kernels/common.h"
 #include "arrow/util/int_util.h"
+#include "arrow/util/macros.h"
 
 #ifndef __has_builtin
 #define __has_builtin(x) 0
@@ -66,7 +67,7 @@ struct Add {
 
   template <typename T>
   static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T 
right) {
-    return to_unsigned(left) + to_unsigned(right);
+    return arrow::internal::SafeSignedAdd(left, right);
   }
 };
 
@@ -75,7 +76,7 @@ struct AddChecked {
   template <typename T>
   static enable_if_integer<T> Call(KernelContext* ctx, T left, T right) {
     T result;
-    if (__builtin_add_overflow(left, right, &result)) {
+    if (ARROW_PREDICT_FALSE(__builtin_add_overflow(left, right, &result))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
     return result;
@@ -83,7 +84,7 @@ struct AddChecked {
 #else
   template <typename T>
   static enable_if_unsigned_integer<T> Call(KernelContext* ctx, T left, T 
right) {
-    if (arrow::internal::HasAdditionOverflow(left, right)) {
+    if (ARROW_PREDICT_FALSE(arrow::internal::HasPositiveAdditionOverflow(left, 
right))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
     return left + right;
@@ -91,12 +92,10 @@ struct AddChecked {
 
   template <typename T>
   static enable_if_signed_integer<T> Call(KernelContext* ctx, T left, T right) 
{
-    auto unsigned_left = to_unsigned(left);
-    auto unsigned_right = to_unsigned(right);
-    if (arrow::internal::HasAdditionOverflow(unsigned_left, unsigned_right)) {
+    if (ARROW_PREDICT_FALSE(arrow::internal::HasSignedAdditionOverflow(left, 
right))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
-    return unsigned_left + unsigned_right;
+    return left + right;
   }
 #endif
 
@@ -119,7 +118,7 @@ struct Subtract {
 
   template <typename T>
   static constexpr enable_if_signed_integer<T> Call(KernelContext*, T left, T 
right) {
-    return to_unsigned(left) - to_unsigned(right);
+    return arrow::internal::SafeSignedSubtract(left, right);
   }
 };
 
@@ -128,7 +127,7 @@ struct SubtractChecked {
   template <typename T>
   static enable_if_integer<T> Call(KernelContext* ctx, T left, T right) {
     T result;
-    if (__builtin_sub_overflow(left, right, &result)) {
+    if (ARROW_PREDICT_FALSE(__builtin_sub_overflow(left, right, &result))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
     return result;
@@ -136,7 +135,8 @@ struct SubtractChecked {
 #else
   template <typename T>
   static enable_if_unsigned_integer<T> Call(KernelContext* ctx, T left, T 
right) {
-    if (arrow::internal::HasSubtractionOverflow(left, right)) {
+    if (ARROW_PREDICT_FALSE(
+            arrow::internal::HasPositiveSubtractionOverflow(left, right))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
     return left - right;
@@ -144,10 +144,10 @@ struct SubtractChecked {
 
   template <typename T>
   static enable_if_signed_integer<T> Call(KernelContext* ctx, T left, T right) 
{
-    if (arrow::internal::HasSubtractionOverflow(left, right)) {
+    if 
(ARROW_PREDICT_FALSE(arrow::internal::HasSignedSubtractionOverflow(left, 
right))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
-    return to_unsigned(left) - to_unsigned(right);
+    return left - right;
   }
 #endif
 
@@ -201,12 +201,12 @@ struct MultiplyChecked {
   static enable_if_integer<T> Call(KernelContext* ctx, T left, T right) {
     T result;
 #if __has_builtin(__builtin_mul_overflow)
-    if (__builtin_mul_overflow(left, right, &result)) {
+    if (ARROW_PREDICT_FALSE(__builtin_mul_overflow(left, right, &result))) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
 #else
     result = Multiply::Call(ctx, left, right);
-    if (left != 0 && result / left != right) {
+    if (left != 0 && ARROW_PREDICT_FALSE(result / left != right)) {
       ctx->SetStatus(Status::Invalid("overflow"));
     }
 #endif
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc 
b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index e0f4890..ceb4623 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -235,9 +235,6 @@ TYPED_TEST(TestBinaryArithmeticSigned, OverflowWraps) {
   auto min = std::numeric_limits<CType>::lowest();
   auto max = std::numeric_limits<CType>::max();
 
-  this->AssertBinop(Add, MakeArray(min, max, max), MakeArray(CType(-1), 1, 
max),
-                    MakeArray(max, min, CType(-2)));
-
   this->AssertBinop(Subtract, MakeArray(min, max, min), MakeArray(1, max, max),
                     MakeArray(max, 0, 1));
   this->AssertBinop(Multiply, MakeArray(min, max, max), MakeArray(max, 2, max),
@@ -261,7 +258,41 @@ TYPED_TEST(TestBinaryArithmeticIntegral, OverflowRaises) {
                           "overflow");
 }
 
-TYPED_TEST(TestBinaryArithmeticSigned, OverflowRaises) {
+TYPED_TEST(TestBinaryArithmeticSigned, AddOverflowRaises) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(Add, MakeArray(max), MakeArray(-1), MakeArray(max - 1));
+  this->AssertBinop(Add, MakeArray(min), MakeArray(1), MakeArray(min + 1));
+  this->AssertBinop(Add, MakeArray(-1), MakeArray(2), MakeArray(1));
+  this->AssertBinop(Add, MakeArray(1), MakeArray(-2), MakeArray(-1));
+
+  this->AssertBinopRaises(Add, MakeArray(max), MakeArray(1), "overflow");
+  this->AssertBinopRaises(Add, MakeArray(min), MakeArray(-1), "overflow");
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, SubOverflowRaises) {
+  using CType = typename TestFixture::CType;
+
+  auto min = std::numeric_limits<CType>::lowest();
+  auto max = std::numeric_limits<CType>::max();
+
+  this->SetOverflowCheck(true);
+
+  this->AssertBinop(Subtract, MakeArray(max), MakeArray(1), MakeArray(max - 
1));
+  this->AssertBinop(Subtract, MakeArray(min), MakeArray(-1), MakeArray(min + 
1));
+  this->AssertBinop(Subtract, MakeArray(-1), MakeArray(-2), MakeArray(1));
+  this->AssertBinop(Subtract, MakeArray(1), MakeArray(2), MakeArray(-1));
+
+  this->AssertBinopRaises(Subtract, MakeArray(max), MakeArray(-1), "overflow");
+  this->AssertBinopRaises(Subtract, MakeArray(min), MakeArray(1), "overflow");
+}
+
+TYPED_TEST(TestBinaryArithmeticSigned, MulOverflowRaises) {
   using CType = typename TestFixture::CType;
 
   auto min = std::numeric_limits<CType>::lowest();
@@ -270,8 +301,16 @@ TYPED_TEST(TestBinaryArithmeticSigned, OverflowRaises) {
   this->SetOverflowCheck(true);
 
   this->AssertBinop(Multiply, MakeArray(max), MakeArray(-1), MakeArray(min + 
1));
+  this->AssertBinop(Multiply, MakeArray(max / 2), MakeArray(-2), MakeArray(min 
+ 2));
+
   this->AssertBinopRaises(Multiply, MakeArray(max), MakeArray(2), "overflow");
+  this->AssertBinopRaises(Multiply, MakeArray(max / 2), MakeArray(3), 
"overflow");
+  this->AssertBinopRaises(Multiply, MakeArray(max / 2), MakeArray(-3), 
"overflow");
+
+  this->AssertBinopRaises(Multiply, MakeArray(min), MakeArray(2), "overflow");
+  this->AssertBinopRaises(Multiply, MakeArray(min / 2), MakeArray(3), 
"overflow");
   this->AssertBinopRaises(Multiply, MakeArray(min), MakeArray(-1), "overflow");
+  this->AssertBinopRaises(Multiply, MakeArray(min / 2), MakeArray(-2), 
"overflow");
 }
 
 TYPED_TEST(TestBinaryArithmeticUnsigned, OverflowWraps) {
diff --git a/cpp/src/arrow/compute/registry.h b/cpp/src/arrow/compute/registry.h
index bb3ded4..2d4c40b 100644
--- a/cpp/src/arrow/compute/registry.h
+++ b/cpp/src/arrow/compute/registry.h
@@ -72,7 +72,7 @@ class ARROW_EXPORT FunctionRegistry {
   std::unique_ptr<FunctionRegistryImpl> impl_;
 };
 
-// \brief Return the process-global function registry
+/// \brief Return the process-global function registry
 ARROW_EXPORT FunctionRegistry* GetFunctionRegistry();
 
 }  // namespace compute
diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h
index 81516eb..1a079bb 100644
--- a/cpp/src/arrow/scalar.h
+++ b/cpp/src/arrow/scalar.h
@@ -42,8 +42,12 @@ namespace arrow {
 
 class Array;
 
-/// \brief Base class for scalar values, representing a single value occupying
-/// an array "slot"
+/// \brief Base class for scalar values
+///
+/// A Scalar represents a single value with a specific DataType.
+/// Scalars are useful for passing single value inputs to compute functions,
+/// or for representing individual array elements (with a non-trivial
+/// wrapping cost, though).
 struct ARROW_EXPORT Scalar : public util::EqualityComparable<Scalar> {
   virtual ~Scalar() = default;
 
@@ -82,6 +86,10 @@ struct ARROW_EXPORT Scalar : public 
util::EqualityComparable<Scalar> {
       : type(std::move(type)), is_valid(is_valid) {}
 };
 
+/// \defgroup concrete-scalar-classes Concrete Scalar subclasses
+///
+/// @{
+
 /// \brief A scalar value for NullType. Never valid
 struct ARROW_EXPORT NullScalar : public Scalar {
  public:
@@ -90,6 +98,8 @@ struct ARROW_EXPORT NullScalar : public Scalar {
   NullScalar() : Scalar{null(), false} {}
 };
 
+/// @}
+
 namespace internal {
 
 struct ARROW_EXPORT PrimitiveScalarBase : public Scalar {
@@ -119,6 +129,10 @@ struct ARROW_EXPORT PrimitiveScalar : public 
PrimitiveScalarBase {
 
 }  // namespace internal
 
+/// \addtogroup concrete-scalar-classes Concrete Scalar subclasses
+///
+/// @{
+
 struct ARROW_EXPORT BooleanScalar : public 
internal::PrimitiveScalar<BooleanType, bool> {
   using Base = internal::PrimitiveScalar<BooleanType, bool>;
   using Base::Base;
@@ -423,9 +437,18 @@ struct ARROW_EXPORT ExtensionScalar : public Scalar {
   using TypeClass = ExtensionType;
 };
 
+/// @}
+
+/// \defgroup scalar-factories Scalar factory functions
+///
+/// @{
+
+/// \brief Scalar factory for null scalars
 ARROW_EXPORT
 std::shared_ptr<Scalar> MakeNullScalar(std::shared_ptr<DataType> type);
 
+/// @}
+
 namespace internal {
 
 inline Status CheckBufferLength(...) { return Status::OK(); }
@@ -465,13 +488,22 @@ struct MakeScalarImpl {
   std::shared_ptr<Scalar> out_;
 };
 
+/// \addtogroup scalar-factories
+///
+/// @{
+
+/// \brief Scalar factory for non-null scalars
 template <typename Value>
 Result<std::shared_ptr<Scalar>> MakeScalar(std::shared_ptr<DataType> type,
                                            Value&& value) {
   return MakeScalarImpl<Value&&>{type, std::forward<Value>(value), 
NULLPTR}.Finish();
 }
 
-/// \brief type inferring scalar factory
+/// \brief Type-inferring scalar factory for non-null scalars
+///
+/// Construct a Scalar instance with a DataType determined by the input C++ 
type.
+/// (for example Int8Scalar for a int8_t input).
+/// Only non-parametric primitive types and String are supported.
 template <typename Value, typename Traits = CTypeTraits<typename 
std::decay<Value>::type>,
           typename ScalarType = typename Traits::ScalarType,
           typename Enable = decltype(ScalarType(std::declval<Value>(),
@@ -484,4 +516,6 @@ inline std::shared_ptr<Scalar> MakeScalar(std::string 
value) {
   return std::make_shared<StringScalar>(std::move(value));
 }
 
+/// @}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/util/int_util.h b/cpp/src/arrow/util/int_util.h
index c4ed0eb..59e4f9c 100644
--- a/cpp/src/arrow/util/int_util.h
+++ b/cpp/src/arrow/util/int_util.h
@@ -83,6 +83,14 @@ SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
                                 static_cast<UnsignedInt>(v));
 }
 
+/// Signed subtraction with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
+                                static_cast<UnsignedInt>(v));
+}
+
 /// Signed left shift with well-defined behaviour on negative numbers or 
overflow
 template <typename SignedInt, typename Shift>
 SignedInt SafeLeftShift(SignedInt u, Shift shift) {
@@ -90,25 +98,42 @@ SignedInt SafeLeftShift(SignedInt u, Shift shift) {
   return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
 }
 
+// TODO Add portable wrappers for __builtin_add_overflow and friends
+// see http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2428.pdf
+
 /// Detect multiplication overflow between *positive* integers
 template <typename Integer>
-bool HasMultiplyOverflow(Integer value, Integer multiplicand) {
+bool HasPositiveMultiplyOverflow(Integer value, Integer multiplicand) {
   return (multiplicand != 0 &&
           value > std::numeric_limits<Integer>::max() / multiplicand);
 }
 
 /// Detect addition overflow between *positive* integers
 template <typename Integer>
-bool HasAdditionOverflow(Integer value, Integer addend) {
+bool HasPositiveAdditionOverflow(Integer value, Integer addend) {
   return (value > std::numeric_limits<Integer>::max() - addend);
 }
 
-/// Detect addition overflow between integers
+/// Detect addition overflow between signed integers
 template <typename Integer>
-bool HasSubtractionOverflow(Integer value, Integer minuend) {
+bool HasSignedAdditionOverflow(Integer value, Integer addend) {
+  return (addend > 0) ? (value > std::numeric_limits<Integer>::max() - addend)
+                      : (value < std::numeric_limits<Integer>::min() - addend);
+}
+
+/// Detect subtraction overflow between *positive* integers
+template <typename Integer>
+bool HasPositiveSubtractionOverflow(Integer value, Integer minuend) {
   return (value < minuend);
 }
 
+/// Detect subtraction overflow between signed integers
+template <typename Integer>
+bool HasSignedSubtractionOverflow(Integer value, Integer subtrahend) {
+  return (subtrahend > 0) ? (value < std::numeric_limits<Integer>::min() + 
subtrahend)
+                          : (value > std::numeric_limits<Integer>::max() + 
subtrahend);
+}
+
 /// Upcast an integer to the largest possible width (currently 64 bits)
 
 template <typename Integer>
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 27a3a92..0bfc303 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1028,7 +1028,7 @@ class TypedRecordReader : public 
ColumnReaderImplBase<DType>,
   // Compute the values capacity in bytes for the given number of elements
   int64_t bytes_for_values(int64_t nitems) const {
     int64_t type_size = GetTypeByteSize(this->descr_->physical_type());
-    if (::arrow::internal::HasMultiplyOverflow(nitems, type_size)) {
+    if (::arrow::internal::HasPositiveMultiplyOverflow(nitems, type_size)) {
       throw ParquetException("Total size of items too large");
     }
     return nitems * type_size;
@@ -1184,7 +1184,7 @@ class TypedRecordReader : public 
ColumnReaderImplBase<DType>,
     if (extra_size < 0) {
       throw ParquetException("Negative size (corrupt file?)");
     }
-    if (::arrow::internal::HasAdditionOverflow(size, extra_size)) {
+    if (::arrow::internal::HasPositiveAdditionOverflow(size, extra_size)) {
       throw ParquetException("Allocation size too large (corrupt file?)");
     }
     const int64_t target_size = size + extra_size;
@@ -1203,7 +1203,8 @@ class TypedRecordReader : public 
ColumnReaderImplBase<DType>,
           UpdateCapacity(levels_capacity_, levels_written_, extra_levels);
       if (new_levels_capacity > levels_capacity_) {
         constexpr auto kItemSize = static_cast<int64_t>(sizeof(int16_t));
-        if (::arrow::internal::HasMultiplyOverflow(new_levels_capacity, 
kItemSize)) {
+        if (::arrow::internal::HasPositiveMultiplyOverflow(new_levels_capacity,
+                                                           kItemSize)) {
           throw ParquetException("Allocation size too large (corrupt file?)");
         }
         PARQUET_THROW_NOT_OK(def_levels_->Resize(new_levels_capacity * 
kItemSize, false));
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 2013a72..4508faa 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -74,6 +74,10 @@ autodoc_default_options = {
     'inherited-members': None
 }
 
+# Breathe configuration
+breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"}
+breathe_default_project = "arrow_cpp"
+
 # Overriden conditionally below
 autodoc_mock_imports = []
 
@@ -86,9 +90,6 @@ napoleon_use_rtype = False
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
-breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"}
-breathe_default_project = "arrow_cpp"
-
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
diff --git a/docs/source/cpp/api.rst b/docs/source/cpp/api.rst
index 9b7d356..59d2210 100644
--- a/docs/source/cpp/api.rst
+++ b/docs/source/cpp/api.rst
@@ -26,8 +26,10 @@ API Reference
    api/memory
    api/datatype
    api/array
+   api/scalar
    api/builder
    api/table
+   api/compute
    api/tensor
    api/utilities
    api/io
diff --git a/docs/source/cpp/getting_started.rst 
b/docs/source/cpp/api/compute.rst
similarity index 53%
copy from docs/source/cpp/getting_started.rst
copy to docs/source/cpp/api/compute.rst
index 5ec0dec..3b0a89f 100644
--- a/docs/source/cpp/getting_started.rst
+++ b/docs/source/cpp/api/compute.rst
@@ -15,23 +15,42 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. default-domain:: cpp
-.. highlight:: cpp
-
-User Guide
-==========
-
-.. toctree::
-
-   overview
-   conventions
-   cmake
-   memory
-   arrays
-   datatypes
-   tables
-   io
-   parquet
-   csv
-   json
-   flight
+Compute Functions
+=================
+
+Datum class
+-----------
+
+.. doxygenclass:: arrow::Datum
+   :members:
+
+Abstract Function classes
+-------------------------
+
+.. doxygengroup:: compute-functions
+   :content-only:
+   :members:
+
+Function registry
+-----------------
+
+.. doxygenclass:: arrow::compute::FunctionRegistry
+   :members:
+
+.. doxygenfunction:: arrow::compute::GetFunctionRegistry
+
+Convenience functions
+---------------------
+
+.. doxygengroup:: compute-call-function
+   :content-only:
+
+Concrete options classes
+------------------------
+
+.. doxygengroup:: compute-concrete-options
+   :content-only:
+   :members:
+   :undoc-members:
+
+.. TODO: List concrete function invocation shortcuts?
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
new file mode 100644
index 0000000..149dbb3
--- /dev/null
+++ b/docs/source/cpp/compute.rst
@@ -0,0 +1,526 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+.. cpp:namespace:: arrow::compute
+
+=================
+Compute Functions
+=================
+
+The generic Compute API
+=======================
+
+.. TODO: describe API and how to invoke compute functions
+
+Functions and function registry
+-------------------------------
+
+Functions represent logical compute operations over inputs of possibly
+varying types.  Internally, a function is implemented by one or several
+"kernels", depending on the concrete input types (for example, a function
+adding values from two inputs can have different kernels depending on
+whether the inputs are integral or floating-point).
+
+Functions are stored in a global :class:`FunctionRegistry` where
+they can be looked up by name.
+
+Input shapes
+------------
+
+Computation inputs are represented as a general :class:`Datum` class,
+which is a tagged union of several shapes of data such as :class:`Scalar`,
+:class:`Array` and :class:`ChunkedArray`.  Many compute functions support
+both array (chunked or not) and scalar inputs, however some will mandate
+either.  For example, the ``fill_null`` function requires its second input
+to be a scalar, while ``sort_indices`` requires its first and only input to
+be an array.
+
+Invoking functions
+------------------
+
+Compute functions can be invoked by name using
+:func:`arrow::compute::CallFunction`::
+
+   std::shared_ptr<arrow::Array> numbers_array = ...;
+   std::shared_ptr<arrow::Scalar> increment = ...;
+   arrow::Datum incremented_datum;
+
+   ARROW_ASSIGN_OR_RAISE(incremented_datum,
+                         arrow::compute::CallFunction("add", {numbers_array, 
increment}));
+   std::shared_ptr<Array> incremented_array = 
std::move(incremented_datum).array();
+
+(note this example uses implicit conversion from ``std::shared_ptr<Array>``
+to ``Datum``)
+
+Many compute functions are also available directly as concrete APIs, here
+:func:`arrow::compute::Add`::
+
+   std::shared_ptr<arrow::Array> numbers_array = ...;
+   std::shared_ptr<arrow::Scalar> increment = ...;
+   arrow::Datum incremented_datum;
+
+   ARROW_ASSIGN_OR_RAISE(incremented_datum,
+                         arrow::compute::Add(numbers_array, increment));
+   std::shared_ptr<Array> incremented_array = 
std::move(incremented_datum).array();
+
+Some functions accept or require an options structure that determines the
+exact semantics of the function::
+
+   MinMaxOptions options;
+   options.null_handling = MinMaxOptions::OUTPUT_NULL;
+
+   std::shared_ptr<arrow::Array> array = ...;
+   arrow::Datum minmax_datum;
+
+   ARROW_ASSIGN_OR_RAISE(minmax_datum,
+                         arrow::compute::CallFunction("minmax", {array}, 
&options));
+
+   // Unpack struct scalar result (a two-field {"min", "max"} scalar)
+   const auto& minmax_scalar = \
+         static_cast<const arrow::StructScalar&>(*minmax_datum.scalar());
+   const auto min_value = minmax_scalar.value[0];
+   const auto max_value = minmax_scalar.value[1];
+
+.. seealso::
+   :doc:`Compute API reference <api/compute>`
+
+
+Available functions
+===================
+
+Type categories
+---------------
+
+To avoid exhaustively listing supported types, the tables below use a number
+of general type categories:
+
+* "Numeric": Integer types (Int8, etc.) and Floating-point types (Float32,
+  Float64, sometimes Float16).  Some functions also accept Decimal128 input.
+
+* "Temporal": Date types (Date32, Date64), Time types (Time32, Time64),
+  Timestamp, Duration, Interval.
+
+* "Binary-like": Binary, LargeBinary, sometimes also FixedSizeBinary.
+
+* "String-like": String, LargeString.
+
+* "List-like": List, LargeList, sometimes also FixedSizeList.
+
+If you are unsure whether a function supports a concrete input type, we
+recommend you try it out.  Unsupported input types return a ``TypeError``
+:class:`Status`.
+
+Aggregations
+------------
+
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| Function name            | Arity      | Input types        | Output type     
      | Options class                              |
++==========================+============+====================+=======================+============================================+
+| count                    | Unary      | Any                | Scalar Int64    
      | :struct:`CountOptions`                     |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| mean                     | Unary      | Numeric            | Scalar Float64  
      |                                            |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| minmax                   | Unary      | Numeric            | Scalar Struct  
(1)    | :struct:`MinMaxOptions`                    |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| sum                      | Unary      | Numeric            | Scalar Numeric 
(2)    |                                            |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+
+Notes:
+
+* \(1) Output is a ``{"min": input type, "max": input type}`` Struct
+
+* \(2) Output is Int64, UInt64 or Float64, depending on the input type
+
+
+Element-wise ("scalar") functions
+---------------------------------
+
+All element-wise functions accept both arrays and scalars as input.  The
+semantics for unary functions are as follow:
+
+* scalar inputs produce a scalar output
+* array inputs produce an array output
+
+Binary functions have the following semantics (which is sometimes called
+"broadcasting" in other systems such as NumPy):
+
+* ``(scalar, scalar)`` inputs produce a scalar output
+* ``(array, array)`` inputs produce an array output (and both inputs must
+  be of the same length)
+* ``(scalar, array)`` and ``(array, scalar)`` produce an array output.
+  The scalar input is handled as if it were an array of the same length N
+  as the other input, with the same value repeated N times.
+
+Arithmetic functions
+~~~~~~~~~~~~~~~~~~~~
+
+These functions expect two inputs of the same type and apply a given binary
+operation to each pair of elements gathered from the inputs.  If any of the
+input elements in a pair is null, the corresponding output element is null.
+
+The default variant of these functions does not detect overflow (the result
+then typically wraps around).  Each function is also available in an
+overflow-checking variant, suffixed ``_checked``, which returns
+an ``Invalid`` :class:`Status` when overflow is detected.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type     
    |
++==========================+============+====================+=====================+
+| add                      | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+| add_checked              | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+| multiply                 | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+| multiply_checked         | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+| subtract                 | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+| subtract_checked         | Binary     | Numeric            | Numeric         
    |
++--------------------------+------------+--------------------+---------------------+
+
+Comparisons
+~~~~~~~~~~~
+
+Those functions expect two inputs of the same type and apply a given
+comparison operator.  If any of the input elements in a pair is null,
+the corresponding output element is null.
+
++--------------------------+------------+---------------------------------------------+---------------------+
+| Function names           | Arity      | Input types                          
       | Output type         |
++==========================+============+=============================================+=====================+
+| equal, not_equal         | Binary     | Numeric, Temporal, Binary- and 
String-like  | Boolean             |
++--------------------------+------------+---------------------------------------------+---------------------+
+| greater, greater_equal,  | Binary     | Numeric, Temporal, Binary- and 
String-like  | Boolean             |
+| less, less_equal         |            |                                      
       |                     |
++--------------------------+------------+---------------------------------------------+---------------------+
+
+Logical functions
+~~~~~~~~~~~~~~~~~~
+
+The normal behaviour for these functions is to emit a null if any of the
+inputs is null (similar to the semantics of ``NaN`` in floating-point
+computations).
+
+Some of them are also available in a `Kleene logic`_ variant (suffixed
+``_kleene``) where null is taken to mean "undefined".  This is the
+interpretation of null used in SQL systems as well as R and Julia,
+for example.
+
+For the Kleene logic variants, therefore:
+
+* "true AND null", "null AND true" give "null" (the result is undefined)
+* "true OR null", "null OR true" give "true"
+* "false AND null", "null AND false" give "false"
+* "false OR null", "null OR false" give "null" (the result is undefined)
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type     
    |
++==========================+============+====================+=====================+
+| and                      | Binary     | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+| and_kleene               | Binary     | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+| invert                   | Unary      | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+| or                       | Binary     | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+| or_kleene                | Binary     | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+| xor                      | Binary     | Boolean            | Boolean         
    |
++--------------------------+------------+--------------------+---------------------+
+
+.. _Kleene logic: 
https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics
+
+String functions
+~~~~~~~~~~~~~~~~
+
++--------------------------+------------+--------------------+---------------------+---------+
+| Function name            | Arity      | Input types        | Output type     
    | Notes   |
++==========================+============+====================+=====================+=========+
+| ascii_length             | Unary      | String-like        | Int32 or Int64  
    | \(1)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| ascii_lower              | Unary      | String-like        | String-like     
    | \(2)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| ascii_upper              | Unary      | String-like        | String-like     
    | \(2)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| utf8_lower               | Unary      | String-like        | String-like     
    | \(3)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| utf8_upper               | Unary      | String-like        | String-like     
    | \(3)    |
++--------------------------+------------+--------------------+---------------------+---------+
+
+* \(1) Output is the physical length in bytes of each input element.  Output
+  type is Int32 for String, Int64 for LargeString.
+
+* \(2) Each ASCII character in the input is converted to lowercase or
+  uppercase.  Non-ASCII characters are left untouched.
+
+* \(3) Each UTF8-encoded character in the input is converted to lowercase or
+  uppercase.
+
+Containment tests
+~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+------------------------------------+---------------+----------------------------------------+
+| Function name            | Arity      | Input types                        | 
Output type   | Options class                          |
++==========================+============+====================================+===============+========================================+
+| binary_contains_exact    | Unary      | String-like                        | 
Boolean (1)   | :struct:`BinaryContainsExactOptions`   |
++--------------------------+------------+------------------------------------+---------------+----------------------------------------+
+| isin                     | Unary      | Boolean, Null, Numeric, Temporal,  | 
Boolean (2)   | :struct:`SetLookupOptions`             |
+|                          |            | Binary- and String-like            | 
              |                                        |
++--------------------------+------------+------------------------------------+---------------+----------------------------------------+
+| match                    | Unary      | Boolean, Null, Numeric, Temporal,  | 
Int32 (3)     | :struct:`SetLookupOptions`             |
+|                          |            | Binary- and String-like            | 
              |                                        |
++--------------------------+------------+------------------------------------+---------------+----------------------------------------+
+
+* \(1) Output is true iff :member:`BinaryContainsExactOptions::pattern`
+  is a substring of the corresponding input element.
+
+* \(2) Output is true iff the corresponding input element is equal to one
+  of the elements in :member:`SetLookupOptions::value_set`.
+
+* \(3) Output is the index of the corresponding input element in
+  :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
+  output is null.
+
+Structural transforms
+~~~~~~~~~~~~~~~~~~~~~
+
+.. XXX (this category is a bit of a hodgepodge)
+
++--------------------------+------------+---------------------------------------+---------------------+---------+
+| Function name            | Arity      | Input types                          
 | Output type         | Notes   |
++==========================+============+=======================================+=====================+=========+
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal     
 | Boolean             | \(1)    |
++--------------------------+------------+---------------------------------------+---------------------+---------+
+| is_null                  | Unary      | Any                                  
 | Boolean             | \(2)    |
++--------------------------+------------+---------------------------------------+---------------------+---------+
+| is_valid                 | Unary      | Any                                  
 | Boolean             | \(2)    |
++--------------------------+------------+---------------------------------------+---------------------+---------+
+| list_value_lengths       | Unary      | List-like                            
 | Int32 or Int64      | \(4)    |
++--------------------------+------------+---------------------------------------+---------------------+---------+
+
+* \(1) First input must be an array, second input a scalar of the same type.
+  Output is an array of the same type as the inputs, and with the same values
+  as the first input, except for nulls replaced with the second input value.
+
+* \(2) Output is true iff the corresponding input element is non-null.
+
+* \(3) Output is true iff the corresponding input element is null.
+
+* \(4) Each output element is the length of the corresponding input element
+  (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
+
+Conversions
+~~~~~~~~~~~
+
+A general conversion function named ``cast`` is provided which accepts a large
+number of input and output types.  The type to cast to can be passed in a
+:struct:`CastOptions` instance.  As an alternative, the same service is
+provided by a concrete function :func:`~arrow::compute::Cast`.
+
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| Function name            | Arity      | Input types        | Output type     
      | Options class                              |
++==========================+============+====================+=======================+============================================+
+| cast                     | Unary      | Many               | Variable        
      | :struct:`CastOptions`                      |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+| strptime                 | Unary      | String-like        | Timestamp       
      | :struct:`StrptimeOptions`                  |
++--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
+
+The conversions available with ``cast`` are listed below.  In all cases, a
+null input value is converted into a null output value.
+
+**Truth value extraction**
+
++-----------------------------+------------------------------------+--------------+
+| Input type                  | Output type                        | Notes     
   |
++=============================+====================================+==============+
+| Binary- and String-like     | Boolean                            | \(1)      
   |
++-----------------------------+------------------------------------+--------------+
+| Numeric                     | Boolean                            | \(2)      
   |
++-----------------------------+------------------------------------+--------------+
+
+* \(1) Output is true iff the corresponding input value has non-zero length.
+
+* \(2) Output is true iff the corresponding input value is non-zero.
+
+**Same-kind conversion**
+
++-----------------------------+------------------------------------+--------------+
+| Input type                  | Output type                        | Notes     
   |
++=============================+====================================+==============+
+| Int32                       | 32-bit Temporal                    | \(1)      
   |
++-----------------------------+------------------------------------+--------------+
+| Int64                       | 64-bit Temporal                    | \(1)      
   |
++-----------------------------+------------------------------------+--------------+
+| (Large)Binary               | (Large)String                      | \(2)      
   |
++-----------------------------+------------------------------------+--------------+
+| (Large)String               | (Large)Binary                      | \(3)      
   |
++-----------------------------+------------------------------------+--------------+
+| Numeric                     | Numeric                            | \(4) \(5) 
   |
++-----------------------------+------------------------------------+--------------+
+| 32-bit Temporal             | Int32                              | \(1)      
   |
++-----------------------------+------------------------------------+--------------+
+| 64-bit Temporal             | Int64                              | \(1)      
   |
++-----------------------------+------------------------------------+--------------+
+| Temporal                    | Temporal                           | \(4) \(5) 
   |
++-----------------------------+------------------------------------+--------------+
+
+* \(1) No-operation cast: the raw values are kept identical, only
+  the type is changed.
+
+* \(2) Validates the contents if :member:`CastOptions::allow_invalid_utf8`
+  is false.
+
+* \(3) No-operation cast: only the type is changed.
+
+* \(4) Overflow and truncation checks are enabled depending on
+  the given :struct:`CastOptions`.
+
+* \(5) Not all such casts have been implemented.
+
+**String representations**
+
++-----------------------------+------------------------------------+---------+
+| Input type                  | Output type                        | Notes   |
++=============================+====================================+=========+
+| Boolean                     | String-like                        |         |
++-----------------------------+------------------------------------+---------+
+| Numeric                     | String-like                        |         |
++-----------------------------+------------------------------------+---------+
+
+**Generic conversions**
+
++-----------------------------+------------------------------------+---------+
+| Input type                  | Output type                        | Notes   |
++=============================+====================================+=========+
+| Dictionary                  | Dictionary value type              | \(1)    |
++-----------------------------+------------------------------------+---------+
+| Extension                   | Extension storage type             |         |
++-----------------------------+------------------------------------+---------+
+| List-like                   | List-like                          | \(2)    |
++-----------------------------+------------------------------------+---------+
+| Null                        | Any                                |         |
++-----------------------------+------------------------------------+---------+
+
+* \(1) The dictionary indices are unchanged, the dictionary values are
+  cast from the input value type to the output value type (if a conversion
+  is available).
+
+* \(2) The list offsets are unchanged, the list values are cast from the
+  input value type to the output value type (if a conversion is
+  available).
+
+
+Array-wise ("vector") functions
+-------------------------------
+
+Associative transforms
+~~~~~~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+------------------------------------+----------------------------+
+| Function name            | Arity      | Input types                        | 
Output type                |
++==========================+============+====================================+============================+
+| dictionary_encode        | Unary      | Boolean, Null, Numeric,            | 
Dictionary (1)             |
+|                          |            | Temporal, Binary- and String-like  | 
                           |
++--------------------------+------------+------------------------------------+----------------------------+
+| unique                   | Unary      | Boolean, Null, Numeric,            | 
Input type (2)             |
+|                          |            | Temporal, Binary- and String-like  | 
                           |
++--------------------------+------------+------------------------------------+----------------------------+
+| value_counts             | Unary      | Boolean, Null, Numeric,            | 
Input type (3)             |
+|                          |            | Temporal, Binary- and String-like  | 
                           |
++--------------------------+------------+------------------------------------+----------------------------+
+
+* \(1) Output is ``Dictionary(Int32, input type)``.
+
+* \(2) Duplicates are removed from the output while the original order is
+  maintained.
+
+* \(3) Output is a ``{"values": input type, "counts": Int64}`` Struct.
+  Each output element corresponds to a unique value in the input, along
+  with the number of times this value has appeared.
+
+Selections
+~~~~~~~~~~
+
+These functions select a subset of the first input defined by the second input.
+
++-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
+| Function name   | Arity      | Input type 1  | Input type 2 | Output type    
  | Options class           | Notes       |
++=================+============+===============+==============+==================+=========================+=============+
+| filter          | Binary     | Any (1)       | Boolean      | Input type 1   
  | :struct:`FilterOptions` | \(2)        |
++-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
+| take            | Binary     | Any (1)       | Integer      | Input type 1   
  | :struct:`TakeOptions`   | \(3)        |
++-----------------+------------+---------------+--------------+------------------+-------------------------+-------------+
+
+* \(1) Unions are unsupported.
+
+* \(2) Each element in input 1 is appended to the output iff the corresponding
+  element in input 2 is true.
+
+* \(3) For each element *i* in input 2, the *i*'th element in input 1 is
+  appended to the output.
+
+Sorts and partitions
+~~~~~~~~~~~~~~~~~~~~
+
+In these functions, nulls are considered greater than any other value
+(they will be sorted or partitioned at the end of the array).
+
++-----------------------+------------+-------------------------+-------------------+--------------------------------+-------------+
+| Function name         | Arity      | Input types             | Output type   
    | Options class                  | Notes       |
++=======================+============+=========================+===================+================================+=============+
+| partition_indices     | Unary      | Binary- and String-like | UInt64        
    | :struct:`PartitionOptions`     | \(1) \(3)   |
++-----------------------+------------+-------------------------+-------------------+--------------------------------+-------------+
+| partition_indices     | Unary      | Numeric                 | UInt64        
    | :struct:`PartitionOptions`     | \(1)        |
++-----------------------+------------+-------------------------+-------------------+--------------------------------+-------------+
+| sort_indices          | Unary      | Binary- and String-like | UInt64        
    |                                | \(2) \(3)   |
++-----------------------+------------+-------------------------+-------------------+--------------------------------+-------------+
+| sort_indices          | Unary      | Numeric                 | UInt64        
    |                                | \(2)        |
++-----------------------+------------+-------------------------+-------------------+--------------------------------+-------------+
+
+* \(1) The output is an array of indices into the input array, that define
+  a partition around the *N*'th input array element in sorted order.  *N* is
+  given in :member:`PartitionOptions::pivot`.
+
+* \(2) The output is an array of indices into the input array, that define
+  a non-stable sort of the input array.
+
+* \(3) Input values are ordered lexicographically as bytestrings (even
+  for String arrays).
+
+
+Structural transforms
+~~~~~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+--------------------+---------------------+---------+
+| Function name            | Arity      | Input types        | Output type     
    | Notes   |
++==========================+============+====================+=====================+=========+
+| list_flatten             | Unary      | List-like          | List value type 
    | \(1)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| list_parent_indices      | Unary      | List-like          | Int32 or Int64  
    | \(2)    |
++--------------------------+------------+--------------------+---------------------+---------+
+
+* \(1) The top level of nesting is removed: all values in the list child array,
+  including nulls, are appended to the output.  However, nulls in the parent
+  list array are discarded.
+
+* \(2) For each value in the list child array, the index at which it is found
+  in the list array is appended to the output.  Nulls in the parent list array
+  are discarded.
diff --git a/docs/source/cpp/getting_started.rst 
b/docs/source/cpp/getting_started.rst
index 5ec0dec..0927e7f 100644
--- a/docs/source/cpp/getting_started.rst
+++ b/docs/source/cpp/getting_started.rst
@@ -30,6 +30,7 @@ User Guide
    arrays
    datatypes
    tables
+   compute
    io
    parquet
    csv
diff --git a/docs/source/python/api/arrays.rst 
b/docs/source/python/api/arrays.rst
index fa4902d..81a00d8 100644
--- a/docs/source/python/api/arrays.rst
+++ b/docs/source/python/api/arrays.rst
@@ -75,43 +75,48 @@ may expose data type-specific methods or properties.
 
 .. _api.scalar:
 
-Array Scalars
--------------
+Scalars
+-------
 
-Indexing an array wraps the represented value in a scalar object whose
-concrete type depends on the array data type.  You shouldn't instantiate
-any of those classes directly.
+This function constructs a new Arrow scalar:
+
+.. autosummary::
+   :toctree: ../generated/
+
+   scalar
+
+A scalar's python class depends on its data type.  Concrete scalar
+classes may expose data type-specific methods or properties.
 
 .. autosummary::
    :toctree: ../generated/
 
    NA
    Scalar
-   ArrayValue
-   BooleanValue
-   Int8Value
-   Int16Value
-   Int32Value
-   Int64Value
-   UInt8Value
-   UInt16Value
-   UInt32Value
-   UInt64Value
-   FloatValue
-   DoubleValue
-   BinaryValue
-   StringValue
-   FixedSizeBinaryValue
-   LargeBinaryValue
-   LargeStringValue
-   Time32Value
-   Time64Value
-   Date32Value
-   Date64Value
-   TimestampValue
-   DecimalValue
-   DictionaryValue
-   ListValue
-   LargeListValue
-   StructValue
-   UnionValue
+   BooleanScalar
+   Int8Scalar
+   Int16Scalar
+   Int32Scalar
+   Int64Scalar
+   UInt8Scalar
+   UInt16Scalar
+   UInt32Scalar
+   UInt64Scalar
+   FloatScalar
+   DoubleScalar
+   BinaryScalar
+   StringScalar
+   FixedSizeBinaryScalar
+   LargeBinaryScalar
+   LargeStringScalar
+   Time32Scalar
+   Time64Scalar
+   Date32Scalar
+   Date64Scalar
+   TimestampScalar
+   Decimal128Scalar
+   DictionaryScalar
+   ListScalar
+   LargeListScalar
+   StructScalar
+   UnionScalar
diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst
index 33d3123..6c07ad9 100644
--- a/docs/source/python/dataset.rst
+++ b/docs/source/python/dataset.rst
@@ -329,8 +329,8 @@ Reading from Minio
 ------------------
 
 In addition to cloud storage, pyarrow also supports reading from a
-`MinIO https://github.com/minio/minio`_ object storage instance emulating S3
-APIs. Paired with `toxiproxy https://github.com/shopify/toxiproxy`_, this is
+`MinIO <https://github.com/minio/minio>`_ object storage instance emulating S3
+APIs. Paired with `toxiproxy <https://github.com/shopify/toxiproxy>`_, this is
 useful for testing or benchmarking.
 
 .. code-block:: python

[arrow] branch master updated: ARROW-8989: [C++][Doc] Document available compute functions

Reply via email to