[GitHub] [arrow-cookbook] drin commented on a diff in pull request #227: Adding recipe for custom compute functions

GitBox Wed, 27 Jul 2022 12:33:48 -0700


drin commented on code in PR #227:
URL: https://github.com/apache/arrow-cookbook/pull/227#discussion_r931511694



##########
cpp/code/compute_fn.cc:
##########
@@ -0,0 +1,270 @@
+// ------------------------------
+// Dependencies
+
+// standard dependencies
+#include <stdint.h>
+#include <string>
+#include <iostream>
+
+// arrow dependencies
+#include <arrow/api.h>
+#include <arrow/compute/api.h>
+#include <arrow/compute/exec/key_hash.h>
+
+#include "common.h"
+
+
+// >> aliases for types in standard library
+using std::shared_ptr;
+using std::vector;
+
+// arrow util types
+using arrow::Result;
+using arrow::Status;
+using arrow::Datum;
+
+// arrow data types and helpers
+using arrow::UInt32Builder;
+using arrow::Int32Builder;
+
+using arrow::Array;
+using arrow::ArraySpan;
+
+
+// aliases for types used in `NamedScalarFn`
+//    |> kernel parameters
+using arrow::compute::KernelContext;
+using arrow::compute::ExecSpan;
+using arrow::compute::ExecResult;
+
+//    |> other context types
+using arrow::compute::ExecContext;
+using arrow::compute::LightContext;
+
+//    |> common types for compute functions
+using arrow::compute::FunctionRegistry;
+using arrow::compute::FunctionDoc;
+using arrow::compute::InputType;
+using arrow::compute::OutputType;
+using arrow::compute::Arity;
+
+//    |> the "kind" of function we want
+using arrow::compute::ScalarFunction;
+
+//    |> structs and classes for hashing
+using arrow::util::MiniBatch;
+using arrow::util::TempVectorStack;
+
+using arrow::compute::KeyColumnArray;
+using arrow::compute::Hashing32;
+
+//    |> functions used for hashing
+using arrow::compute::ColumnArrayFromArrayData;
+
+
+// ------------------------------
+// Structs and Classes
+
+// >> Documentation for a compute function
+/**
+ * Create a const instance of `FunctionDoc` that contains 3 attributes:
+ *  1. Short description
+ *  2. Long  description (limited to 78 characters)
+ *  3. Name of input arguments
+ */
+const FunctionDoc named_scalar_fn_doc {
+   "Unary function that calculates a hash for each row of the input"
+  ,"This function uses an xxHash-like algorithm which produces 32-bit hashes."
+  ,{ "input_array" }
+};
+
+
+// >> Kernel implementations for a compute function
+/**
+ * Create implementations that will be associated with our compute function. 
When a
+ * compute function is invoked, the compute API framework will delegate 
execution to an
+ * associated kernel that matches: (1) input argument types/shapes and (2) 
output argument
+ * types/shapes.
+ *
+ * Kernel implementations may be functions or may be methods (functions within 
a class or
+ * struct).
+ */
+struct NamedScalarFn {
+
+  /**
+   * A kernel implementation that expects a single array as input, and outputs 
an array of
+   * uint32 values. We write this implementation knowing what function we want 
to
+   * associate it with ("NamedScalarFn"), but that association is made later 
(see
+   * `RegisterScalarFnKernels()` below).
+   */
+  static Status
+  Exec(KernelContext *ctx, const ExecSpan &input_arg, ExecResult *out) {
+    StartRecipe("DefineAComputeKernel");
+
+    if (input_arg.num_values() != 1 or not input_arg[0].is_array()) {
+      return Status::Invalid("Unsupported argument types or shape");
+    }
+
+    // >> Initialize stack-based memory allocator with an allocator and memory 
size
+    TempVectorStack stack_memallocator;
+    auto            input_dtype_width = input_arg[0].type()->bit_width();
+    if (input_dtype_width > 0) {
+      ARROW_RETURN_NOT_OK(
+        stack_memallocator.Init(
+           ctx->exec_context()->memory_pool()
+          ,input_dtype_width * max_batchsize
+        )
+      );
+    }

Review Comment:
   I guess this recipe can just be specific to a single data type for 
conciseness, so I'll remove this. I'll maybe make another recipe that shows how 
to work with various data types.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow-cookbook] drin commented on a diff in pull request #227: Adding recipe for custom compute functions

Reply via email to