pitrou commented on code in PR #46261:
URL: https://github.com/apache/arrow/pull/46261#discussion_r2097667432


##########
c_glib/arrow-glib/compute.cpp:
##########
@@ -37,6 +37,9 @@
 #include <arrow/acero/exec_plan.h>
 #include <arrow/acero/options.h>
 
+// Initialize the compute library and register compute kernels.
+auto compute_init_status_ = arrow::compute::Initialize();

Review Comment:
   This makes it a public symbol, should it be private instead?
   ```suggestion
   static auto compute_init_status_ = arrow::compute::Initialize();
   ```
   



##########
python/pyarrow/_compute.pyx:
##########
@@ -70,6 +70,10 @@ def _forbid_instantiation(klass, subclasses_instead=True):
     raise TypeError(msg)
 
 
+# Call to initialize the compute module (register kernels) on import
+CInitializeCompute()

Review Comment:
   Shouldn't you call `check_status` on the result?



##########
python/CMakeLists.txt:
##########
@@ -643,12 +656,14 @@ get_filename_component(ARROW_INCLUDE_ARROW_DIR_REAL 
${ARROW_INCLUDE_DIR}/arrow R
 install(DIRECTORY ${ARROW_INCLUDE_ARROW_DIR_REAL} DESTINATION 
${CMAKE_INSTALL_INCLUDEDIR})
 
 if(PYARROW_BUNDLE_ARROW_CPP)
-  # Arrow
+  # Arrow and Compute
   bundle_arrow_lib(${ARROW_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
+  bundle_arrow_lib(${ARROW_COMPUTE_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
 
   if(MSVC)
     # TODO(kszucs): locate msvcp140.dll in a portable fashion and bundle it

Review Comment:
   Can we remove this TODO?



##########
cpp/src/arrow/compute/kernels/registry.cc:
##########
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "arrow/compute/registry.h"
+#include "arrow/compute/kernels/registry.h"
+
+#include <algorithm>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/compute/function.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/registry_internal.h"
+#include "arrow/status.h"
+#include "arrow/util/config.h"  // For ARROW_COMPUTE
+#include "arrow/util/logging.h"
+
+namespace arrow::compute {
+namespace internal {
+
+Status RegisterComputeKernels() {
+  auto registry = GetFunctionRegistry();
+
+  // Check if a compute kernels function is already registered
+  // to avoid multiple registration attempts.
+  auto func = registry->GetFunction("rank");
+  if (func.ok()) {
+    return Status::OK();
+  }
+
+  // Register additional kernels on libarrow_compute
+  // Scalar functions
+  internal::RegisterScalarArithmetic(registry);
+  internal::RegisterScalarBoolean(registry);
+  internal::RegisterScalarComparison(registry);
+  internal::RegisterScalarIfElse(registry);
+  internal::RegisterScalarNested(registry);
+  internal::RegisterScalarRandom(registry);  // Nullary
+  internal::RegisterScalarRoundArithmetic(registry);
+  internal::RegisterScalarSetLookup(registry);
+  internal::RegisterScalarStringAscii(registry);
+  internal::RegisterScalarStringUtf8(registry);
+  internal::RegisterScalarTemporalBinary(registry);
+  internal::RegisterScalarTemporalUnary(registry);
+  internal::RegisterScalarValidity(registry);
+
+  // Vector functions
+  internal::RegisterVectorArraySort(registry);
+  internal::RegisterVectorCumulativeSum(registry);
+  internal::RegisterVectorNested(registry);
+  internal::RegisterVectorRank(registry);
+  internal::RegisterVectorReplace(registry);
+  internal::RegisterVectorSelectK(registry);
+  internal::RegisterVectorSort(registry);
+  internal::RegisterVectorRunEndEncode(registry);
+  internal::RegisterVectorRunEndDecode(registry);
+  internal::RegisterVectorPairwise(registry);
+  internal::RegisterVectorStatistics(registry);
+  internal::RegisterVectorSwizzle(registry);
+
+  // Aggregate functions
+  internal::RegisterHashAggregateBasic(registry);
+  internal::RegisterHashAggregateNumeric(registry);
+  internal::RegisterHashAggregatePivot(registry);
+  internal::RegisterScalarAggregateBasic(registry);
+  internal::RegisterScalarAggregateMode(registry);
+  internal::RegisterScalarAggregatePivot(registry);
+  internal::RegisterScalarAggregateQuantile(registry);
+  internal::RegisterScalarAggregateTDigest(registry);
+  internal::RegisterScalarAggregateVariance(registry);
+
+  return Status::OK();
+}
+
+}  // namespace internal
+
+Status Initialize() { return internal::RegisterComputeKernels(); }

Review Comment:
   Should we arrange for this to be multi-thread safe? People may want to call 
it lazily.
   



##########
cpp/src/arrow/compute/codegen_internal.h:
##########
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/api_vector.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {
+
+ARROW_EXPORT Result<TypeHolder> FirstType(KernelContext*,

Review Comment:
   Why is this function moved here but not the other similar ones in 
`kernels/codegen_internal.h`?



##########
docs/source/cpp/compute.rst:
##########
@@ -42,6 +43,16 @@ whether the inputs are integral or floating-point).
 Functions are stored in a global :class:`FunctionRegistry` where
 they can be looked up by name.
 
+Compute Initialization
+----------------------
+
+The compute library requires a call to :func:`arrow::compute::Initialize`
+in order to register the individual functions into the 
:class:`FunctionRegistry`, otherwise
+only the functions required for Arrow core will be available  \(1).

Review Comment:
   ```suggestion
   only the functions required for Arrow core functionality will be available  
\(1).
   ```



##########
docs/source/cpp/compute.rst:
##########
@@ -42,6 +43,16 @@ whether the inputs are integral or floating-point).
 Functions are stored in a global :class:`FunctionRegistry` where
 they can be looked up by name.
 
+Compute Initialization
+----------------------
+
+The compute library requires a call to :func:`arrow::compute::Initialize`
+in order to register the individual functions into the 
:class:`FunctionRegistry`, otherwise
+only the functions required for Arrow core will be available  \(1).
+
+\(1) Those are ``add``, ``array_filter``, ``array_take``, 
``dictionary_encode``, ``dictionary_decode``,
+``indices_nonzero``, ``unique`` and ``value_counts``.

Review Comment:
   I'm not sure we want to list those. Users should not rely on it.



##########
cpp/src/arrow/compute/kernels/api.h:
##########
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/compute/kernels/registry.h"

Review Comment:
   Why not move this into `arrow/compute/api.h`? Is there any benefit in having 
this separate file?



##########
r/src/compute.cpp:
##########
@@ -618,6 +622,9 @@ SEXP compute__CallFunction(std::string func_name, 
cpp11::list args, cpp11::list
 
 // [[arrow::export]]
 std::vector<std::string> compute__GetFunctionNames() {
+#if ARROW_VERSION_MAJOR >= 21
+  auto compute_init_status_ = arrow::compute::Initialize();

Review Comment:
   How about checking that no error occurred?



##########
cpp/examples/tutorial_examples/dataset_example.cc:
##########
@@ -75,6 +76,8 @@ arrow::Result<std::string> CreateExampleParquetDataset(
 }
 
 arrow::Status PrepareEnv() {
+  // Initilize the compute module to register the required kernels for Dataset
+  ARROW_RETURN_NOT_OK(arrow::compute::Initialize());

Review Comment:
   Should this be done implicitly by the Dataset layer? Or is it not possible? 
cc @bkietz 



##########
cpp/src/arrow/compute/codegen_internal.cc:
##########
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/codegen_internal.h"
+
+#include "arrow/compute/api_vector.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+namespace compute {
+namespace internal {

Review Comment:
   Can use nested notation here.
   ```suggestion
   namespace arrow::compute::internal {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to