This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new bf424e5013 GH-15286: [GLib] Add GArrowIndexOptions (#34679)
bf424e5013 is described below
commit bf424e501360a030c2446a33f3436535fa7139cb
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Mar 24 11:51:00 2023 +0900
GH-15286: [GLib] Add GArrowIndexOptions (#34679)
### Rationale for this change
It's needed to use `index` compute function.
### What changes are included in this PR?
Add `GArrowIndexOptions` and related API for easy to use.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #15286
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/compute.cpp | 153 ++++++++++++++++++++++++++-
c_glib/arrow-glib/compute.h | 16 +++
c_glib/arrow-glib/compute.hpp | 7 ++
ruby/red-arrow/lib/arrow/array-computable.rb | 13 +++
ruby/red-arrow/lib/arrow/data-type.rb | 9 ++
ruby/red-arrow/lib/arrow/scalar.rb | 67 ++++++++++++
ruby/red-arrow/test/test-array.rb | 7 ++
ruby/red-arrow/test/test-chunked-array.rb | 9 ++
ruby/red-arrow/test/test-function.rb | 14 +--
ruby/red-arrow/test/test-scalar.rb | 65 ++++++++++++
10 files changed, 348 insertions(+), 12 deletions(-)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 987de8d39c..cd0fa71c52 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -230,6 +230,13 @@ G_BEGIN_DECLS
* #GArrowUTF8NormalizeOptions is a class to customize the
* `utf8_normalize` function.
*
+ * #GArrowQuantileOptions is a class to customize the `qunatile`
+ * function.
+ *
+ * #GArrowIndexOptions is a class to customize the `index` function.
+ *
+ * #GArrowRankOptions is a class to customize the `rank` function.
+ *
* There are many functions to compute data on an array.
*/
@@ -3890,11 +3897,6 @@ G_DEFINE_TYPE(GArrowQuantileOptions,
garrow_quantile_options,
GARROW_TYPE_FUNCTION_OPTIONS)
-#define GARROW_QUANTILE_OPTIONS_GET_PRIVATE(object) \
- static_cast<GArrowQuantileOptionsPrivate *>( \
- garrow_quantile_options_get_instance_private( \
- GARROW_QUANTILE_OPTIONS(object)))
-
static void
garrow_quantile_options_set_property(GObject *object,
guint prop_id,
@@ -4100,6 +4102,126 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions
*options,
}
+struct GArrowIndexOptionsPrivate {
+ GArrowScalar *value;
+};
+
+enum {
+ PROP_INDEX_OPTIONS_VALUE = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowIndexOptions,
+ garrow_index_options,
+ GARROW_TYPE_FUNCTION_OPTIONS)
+
+#define GARROW_INDEX_OPTIONS_GET_PRIVATE(object) \
+ static_cast<GArrowIndexOptionsPrivate *>( \
+ garrow_index_options_get_instance_private( \
+ GARROW_INDEX_OPTIONS(object)))
+
+static void
+garrow_index_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GARROW_INDEX_OPTIONS_GET_PRIVATE(object);
+ auto options = garrow_index_options_get_raw(GARROW_INDEX_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_INDEX_OPTIONS_VALUE:
+ {
+ auto scalar = GARROW_SCALAR(g_value_get_object(value));
+ if (priv->value == scalar) {
+ return;
+ }
+ if (priv->value) {
+ g_object_unref(priv->value);
+ }
+ priv->value = scalar;
+ if (priv->value) {
+ g_object_ref(priv->value);
+ options->value = garrow_scalar_get_raw(scalar);
+ } else {
+ options->value = nullptr;
+ }
+ break;
+ }
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_index_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GARROW_INDEX_OPTIONS_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_INDEX_OPTIONS_VALUE:
+ g_value_set_object(value, priv->value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_index_options_init(GArrowIndexOptions *object)
+{
+ auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+ priv->options = static_cast<arrow::compute::FunctionOptions *>(
+ new arrow::compute::IndexOptions());
+}
+
+static void
+garrow_index_options_class_init(GArrowIndexOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->set_property = garrow_index_options_set_property;
+ gobject_class->get_property = garrow_index_options_get_property;
+
+
+ GParamSpec *spec;
+ /**
+ * GArrowIndexOptions:value:
+ *
+ * The value to be compared.
+ *
+ * Since: 12.0.0
+ */
+ spec = g_param_spec_object("value",
+ "Value",
+ "The value to be compared.",
+ GARROW_TYPE_SCALAR,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_INDEX_OPTIONS_VALUE,
spec);
+}
+
+/**
+ * garrow_index_options_new:
+ * @value: (nullable): A #GArrowScalar to be compared.
+ *
+ * Returns: A newly created #GArrowIndexOptions.
+ *
+ * Since: 12.0.0
+ */
+GArrowIndexOptions *
+garrow_index_options_new(GArrowScalar *value)
+{
+ return GARROW_INDEX_OPTIONS(
+ g_object_new(GARROW_TYPE_INDEX_OPTIONS,
+ "value", value,
+ NULL));
+}
+
+
enum {
PROP_RANK_OPTIONS_NULL_PLACEMENT = 1,
PROP_RANK_OPTIONS_TIEBREAKER,
@@ -5587,6 +5709,11 @@ garrow_function_options_new_raw(
static_cast<const arrow::compute::QuantileOptions *>(arrow_options);
auto options = garrow_quantile_options_new_raw(arrow_quantile_options);
return GARROW_FUNCTION_OPTIONS(options);
+ } else if (arrow_type_name == "IndexOptions") {
+ const auto arrow_index_options =
+ static_cast<const arrow::compute::IndexOptions *>(arrow_options);
+ auto options = garrow_index_options_new_raw(arrow_index_options);
+ return GARROW_FUNCTION_OPTIONS(options);
} else if (arrow_type_name == "RankOptions") {
const auto arrow_rank_options =
static_cast<const arrow::compute::RankOptions *>(arrow_options);
@@ -5980,6 +6107,22 @@ garrow_quantile_options_get_raw(GArrowQuantileOptions
*options)
}
+GArrowIndexOptions *
+garrow_index_options_new_raw(const arrow::compute::IndexOptions *arrow_options)
+{
+ auto arrow_value = arrow_options->value->GetSharedPtr();
+ auto value = garrow_scalar_new_raw(&arrow_value);
+ return garrow_index_options_new(value);
+}
+
+arrow::compute::IndexOptions *
+garrow_index_options_get_raw(GArrowIndexOptions *options)
+{
+ return static_cast<arrow::compute::IndexOptions *>(
+ garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
+
+
GArrowRankOptions *
garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options)
{
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 673018137d..1003b4dd60 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -806,6 +806,22 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions
*options,
gsize n);
+#define GARROW_TYPE_INDEX_OPTIONS (garrow_index_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowIndexOptions,
+ garrow_index_options,
+ GARROW,
+ INDEX_OPTIONS,
+ GArrowFunctionOptions)
+struct _GArrowIndexOptionsClass
+{
+ GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_12_0
+GArrowIndexOptions *
+garrow_index_options_new(GArrowScalar *value);
+
+
/**
* GArrowRankTiebreader:
* @GARROW_RANK_TIEBREAKER_MIN:
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 025db98dc6..50bc89a1e2 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -165,6 +165,13 @@ arrow::compute::QuantileOptions *
garrow_quantile_options_get_raw(GArrowQuantileOptions *options);
+GArrowIndexOptions *
+garrow_index_options_new_raw(
+ const arrow::compute::IndexOptions *arrow_options);
+arrow::compute::IndexOptions *
+garrow_index_options_get_raw(GArrowIndexOptions *options);
+
+
GArrowRankOptions *
garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options);
arrow::compute::RankOptions *
diff --git a/ruby/red-arrow/lib/arrow/array-computable.rb
b/ruby/red-arrow/lib/arrow/array-computable.rb
index 2532da537f..cac603e94f 100644
--- a/ruby/red-arrow/lib/arrow/array-computable.rb
+++ b/ruby/red-arrow/lib/arrow/array-computable.rb
@@ -29,6 +29,19 @@ module Arrow
unique.values
end
+ # Finds the index of the first occurrence of a given value.
+ #
+ # @param value [Object] The value to be compared.
+ #
+ # @return [Integer] The index of the first occurrence of a given
+ # value on found, -1 on not found.
+ #
+ # @since 12.0.0
+ def index(value)
+ value = Scalar.resolve(value, value_data_type)
+ compute("index", options: {value: value}).value
+ end
+
private
def compute(name, options: nil)
Function.find(name).execute([self], options).value
diff --git a/ruby/red-arrow/lib/arrow/data-type.rb
b/ruby/red-arrow/lib/arrow/data-type.rb
index a95212780e..879b31d403 100644
--- a/ruby/red-arrow/lib/arrow/data-type.rb
+++ b/ruby/red-arrow/lib/arrow/data-type.rb
@@ -199,5 +199,14 @@ module Arrow
args.unshift(self) unless builder_class.buildable?(args)
builder_class.build(*args)
end
+
+ # @return [Arrow::Scalar} A corresponding {Arrow::Scalar} class
+ # for this data type.
+ #
+ # @since 12.0.0
+ def scalar_class
+ base_name = self.class.name.gsub(/DataType\z/, "")
+ ::Arrow.const_get("#{base_name}Scalar")
+ end
end
end
diff --git a/ruby/red-arrow/lib/arrow/scalar.rb
b/ruby/red-arrow/lib/arrow/scalar.rb
index b2bf1ac596..0606a60843 100644
--- a/ruby/red-arrow/lib/arrow/scalar.rb
+++ b/ruby/red-arrow/lib/arrow/scalar.rb
@@ -17,6 +17,73 @@
module Arrow
class Scalar
+ class << self
+ # @api private
+ def try_convert(value)
+ case value
+ when self
+ value
+ when true, false
+ BooleanScalar.new(value)
+ when Symbol, String
+ StringScalar.new(value.to_s)
+ when Integer
+ Int64Scalar.new(value)
+ when Float
+ DoubleScalar.new(value)
+ else
+ nil
+ end
+ end
+
+ # Ensure returning suitable {Arrow::Scalar}.
+ #
+ # @overload resolve(scalar)
+ #
+ # Returns the given scalar itself. This is convenient to
+ # use this method as {Arrow::Scalar} converter.
+ #
+ # @param scalar [Arrow::Scalar] The scalar.
+ #
+ # @return [Arrow::Scalar] The given scalar itself.
+ #
+ # @overload resolve(value)
+ #
+ # Creates a suitable scalar from the given value. For example,
+ # you can create {Arrow::BooleanScalar} from `true`.
+ #
+ # @param value [Object] The value.
+ #
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
+ #
+ # @overload resolve(value, data_type)
+ #
+ # Creates a scalar of `data_type.scalar_class` from the given
+ # value. For example, you can create {Arrow::Int32Scalar} from
+ # `29` and {Arrow::Int32DataType}.
+ #
+ # @param value [Object] The value.
+ #
+ # @param data_type [Arrow::DataType] The {Arrow::DataType} to
+ # decide the returned scalar class.
+ #
+ # @return [Arrow::Scalar] A suitable {Arrow::Scalar} for `value`.
+ #
+ # @since 12.0.0
+ def resolve(value, data_type=nil)
+ return try_convert(value) if data_type.nil?
+
+ data_type = DataType.resolve(data_type)
+ scalar_class = data_type.scalar_class
+ case value
+ when Scalar
+ return value if value.class == scalar_class
+ value = value.value
+ end
+ scalar_class.new(value)
+ end
+ end
+
# @param other [Arrow::Scalar] The scalar to be compared.
# @param options [Arrow::EqualOptions, Hash] (nil)
# The options to custom how to compare.
diff --git a/ruby/red-arrow/test/test-array.rb
b/ruby/red-arrow/test/test-array.rb
index 2b7112da6f..6dbf7e52e8 100644
--- a/ruby/red-arrow/test/test-array.rb
+++ b/ruby/red-arrow/test/test-array.rb
@@ -322,4 +322,11 @@ class ArrayTest < Test::Unit::TestCase
end
end
end
+
+ sub_test_case("#index") do
+ test("Integer") do
+ assert_equal(2,
+ Arrow::Int32Array.new([1, 2, 3, 4, 5]).index(3))
+ end
+ end
end
diff --git a/ruby/red-arrow/test/test-chunked-array.rb
b/ruby/red-arrow/test/test-chunked-array.rb
index c08613823b..7c23cee7f7 100644
--- a/ruby/red-arrow/test/test-chunked-array.rb
+++ b/ruby/red-arrow/test/test-chunked-array.rb
@@ -186,4 +186,13 @@ class ChunkedArrayTest < Test::Unit::TestCase
assert_equal(Arrow::ChunkedArray.new([["1", nil, "3"]]),
chunked_array.cast(:string))
end
+
+ test("#index") do
+ arrays = [
+ Arrow::Int32Array.new([1, 2]),
+ Arrow::Int32Array.new([3, 4, 5]),
+ ]
+ chunked_array = Arrow::ChunkedArray.new(arrays)
+ assert_equal(2, chunked_array.index(3))
+ end
end
diff --git a/ruby/red-arrow/test/test-function.rb
b/ruby/red-arrow/test/test-function.rb
index 8232aeca0a..35aa246137 100644
--- a/ruby/red-arrow/test/test-function.rb
+++ b/ruby/red-arrow/test/test-function.rb
@@ -199,12 +199,12 @@ class FunctionTest < Test::Unit::TestCase
end
def test_call
- or_function = Arrow::Function.find("or")
- args = [
- Arrow::BooleanArray.new([true, false, false]),
- Arrow::BooleanArray.new([true, false, true]),
- ]
- assert_equal([true, false, true],
- or_function.call(args).value.to_a)
+ or_function = Arrow::Function.find("or")
+ args = [
+ Arrow::BooleanArray.new([true, false, false]),
+ Arrow::BooleanArray.new([true, false, true]),
+ ]
+ assert_equal([true, false, true],
+ or_function.call(args).value.to_a)
end
end
diff --git a/ruby/red-arrow/test/test-scalar.rb
b/ruby/red-arrow/test/test-scalar.rb
new file mode 100644
index 0000000000..34d2a9b34a
--- /dev/null
+++ b/ruby/red-arrow/test/test-scalar.rb
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class ScalarTest < Test::Unit::TestCase
+ sub_test_case(".resolve") do
+ test("Scalar") do
+ assert_equal(Arrow::Int32Scalar.new(29),
+ Arrow::Scalar.resolve(Arrow::Int32Scalar.new(29)))
+ end
+
+ test("true") do
+ assert_equal(Arrow::BooleanScalar.new(true),
+ Arrow::Scalar.resolve(true))
+ end
+
+ test("false") do
+ assert_equal(Arrow::BooleanScalar.new(false),
+ Arrow::Scalar.resolve(false))
+ end
+
+ test("Symbol") do
+ assert_equal(Arrow::StringScalar.new("hello"),
+ Arrow::Scalar.resolve(:hello))
+ end
+
+ test("String") do
+ assert_equal(Arrow::StringScalar.new("hello"),
+ Arrow::Scalar.resolve("hello"))
+ end
+
+ test("Integer") do
+ assert_equal(Arrow::Int64Scalar.new(-29),
+ Arrow::Scalar.resolve(-29))
+ end
+
+ test("Float") do
+ assert_equal(Arrow::DoubleScalar.new(2.9),
+ Arrow::Scalar.resolve(2.9))
+ end
+
+ test("Int64Scalar, :int32") do
+ assert_equal(Arrow::Int32Scalar.new(-29),
+ Arrow::Scalar.resolve(Arrow::Int64Scalar.new(-29), :int32))
+ end
+
+ test("Integer, :int32") do
+ assert_equal(Arrow::Int32Scalar.new(-29),
+ Arrow::Scalar.resolve(-29, :int32))
+ end
+ end
+end