This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0b8fb56cba GH-48508: [GLib][Ruby] Add TDigestOptions (#48529)
0b8fb56cba is described below
commit 0b8fb56cba732dd9ef044aab67163282b4165de4
Author: Sten Larsson <[email protected]>
AuthorDate: Thu Jan 1 07:42:08 2026 +0100
GH-48508: [GLib][Ruby] Add TDigestOptions (#48529)
### Rationale for this change
The `TDigestOptions` class is not available in GLib/Ruby, and it is used
together with the `tdigest` compute function.
### What changes are included in this PR?
This adds the `TDigestOptions` class to GLib.
### Are these changes tested?
Yes, with Ruby unit tests.
### Are there any user-facing changes?
Yes, a new class.
* GitHub Issue: #48508
Authored-by: Sten Larsson <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/compute.cpp | 252 +++++++++++++++++++++++++++++++++++-
c_glib/arrow-glib/compute.h | 25 ++++
c_glib/arrow-glib/compute.hpp | 5 +
c_glib/test/test-tdigest-options.rb | 71 ++++++++++
4 files changed, 350 insertions(+), 3 deletions(-)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index db35b9a89e..7e786c870c 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -259,8 +259,8 @@ G_BEGIN_DECLS
* such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and
* `cumulative_min`.
*
- * #GArrowDictionaryEncodeOptions is a class to customize the
`dictionary_encode`
- * function.
+ * #GArrowDictionaryEncodeOptions is a class to customize the
+ * `dictionary_encode` function.
*
* #GArrowElementWiseAggregateOptions is a class to customize element-wise
* aggregate functions such as `min_element_wise` and `max_element_wise`.
@@ -299,7 +299,6 @@ G_BEGIN_DECLS
* #GArrowReplaceSliceOptions is a class to customize the
* `utf8_replace_slice` and `binary_replace_slice` functions.
*
- *
* #GArrowPartitionNthOptions is a class to customize the
* `partition_nth_indices` function.
*
@@ -327,6 +326,9 @@ G_BEGIN_DECLS
* #GArrowSliceOptions is a class to customize the `utf8_slice_codeunits` and
* `binary_slice` functions.
*
+ * #GArrowTDigestOptions is a class to customize the `tdigest` and
+ * `hash_tdigest` functions.
+ *
* There are many functions to compute data on an array.
*/
@@ -9781,6 +9783,219 @@ garrow_slice_options_new(void)
return GARROW_SLICE_OPTIONS(g_object_new(GARROW_TYPE_SLICE_OPTIONS,
nullptr));
}
+enum {
+ PROP_TDIGEST_OPTIONS_DELTA = 1,
+ PROP_TDIGEST_OPTIONS_BUFFER_SIZE,
+ PROP_TDIGEST_OPTIONS_SKIP_NULLS,
+ PROP_TDIGEST_OPTIONS_MIN_COUNT,
+};
+
+G_DEFINE_TYPE(GArrowTDigestOptions, garrow_tdigest_options,
GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_tdigest_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_TDIGEST_OPTIONS_DELTA:
+ options->delta = g_value_get_uint(value);
+ break;
+ case PROP_TDIGEST_OPTIONS_BUFFER_SIZE:
+ options->buffer_size = g_value_get_uint(value);
+ break;
+ case PROP_TDIGEST_OPTIONS_SKIP_NULLS:
+ options->skip_nulls = g_value_get_boolean(value);
+ break;
+ case PROP_TDIGEST_OPTIONS_MIN_COUNT:
+ options->min_count = g_value_get_uint(value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_tdigest_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_TDIGEST_OPTIONS_DELTA:
+ g_value_set_uint(value, options->delta);
+ break;
+ case PROP_TDIGEST_OPTIONS_BUFFER_SIZE:
+ g_value_set_uint(value, options->buffer_size);
+ break;
+ case PROP_TDIGEST_OPTIONS_SKIP_NULLS:
+ g_value_set_boolean(value, options->skip_nulls);
+ break;
+ case PROP_TDIGEST_OPTIONS_MIN_COUNT:
+ g_value_set_uint(value, options->min_count);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_tdigest_options_init(GArrowTDigestOptions *object)
+{
+ auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+ arrow_priv->options =
+ static_cast<arrow::compute::FunctionOptions *>(new
arrow::compute::TDigestOptions());
+}
+
+static void
+garrow_tdigest_options_class_init(GArrowTDigestOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->set_property = garrow_tdigest_options_set_property;
+ gobject_class->get_property = garrow_tdigest_options_get_property;
+
+ auto options = arrow::compute::TDigestOptions::Defaults();
+
+ GParamSpec *spec;
+ /**
+ * GArrowTDigestOptions:delta:
+ *
+ * Compression parameter, default 100.
+ *
+ * Since: 23.0.0
+ */
+ spec = g_param_spec_uint("delta",
+ "Delta",
+ "Compression parameter, default 100",
+ 0,
+ G_MAXUINT32,
+ options.delta,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_DELTA,
spec);
+
+ /**
+ * GArrowTDigestOptions:buffer-size:
+ *
+ * Input buffer size, default 500.
+ *
+ * Since: 23.0.0
+ */
+ spec = g_param_spec_uint("buffer-size",
+ "Buffer size",
+ "Input buffer size, default 500",
+ 0,
+ G_MAXUINT32,
+ options.buffer_size,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
PROP_TDIGEST_OPTIONS_BUFFER_SIZE, spec);
+
+ /**
+ * GArrowTDigestOptions:skip-nulls:
+ *
+ * If true (the default), null values are ignored. Otherwise, if any
+ * value is null, emit null.
+ *
+ * Since: 23.0.0
+ */
+ spec = g_param_spec_boolean("skip-nulls",
+ "Skip nulls",
+ "If true (the default), null values are ignored.
"
+ "Otherwise, if any value is null, emit null.",
+ options.skip_nulls,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
PROP_TDIGEST_OPTIONS_SKIP_NULLS, spec);
+
+ /**
+ * GArrowTDigestOptions:min-count:
+ *
+ * If less than this many non-null values are observed, emit null.
+ *
+ * Since: 23.0.0
+ */
+ spec =
+ g_param_spec_uint("min-count",
+ "Min count",
+ "If less than this many non-null values are observed,
emit null",
+ 0,
+ G_MAXUINT32,
+ options.min_count,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
PROP_TDIGEST_OPTIONS_MIN_COUNT, spec);
+}
+
+/**
+ * garrow_tdigest_options_new:
+ *
+ * Returns: A newly created #GArrowTDigestOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowTDigestOptions *
+garrow_tdigest_options_new(void)
+{
+ return GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS,
nullptr));
+}
+
+/**
+ * garrow_tdigest_options_get_qs:
+ * @options: A #GArrowTDigestOptions.
+ * @n: (out): The number of `q`s.
+ *
+ * Returns: (array length=n) (transfer none): The `q`s to be used.
+ *
+ * Since: 23.0.0
+ */
+const gdouble *
+garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n)
+{
+ auto priv = garrow_tdigest_options_get_raw(options);
+ if (n) {
+ *n = priv->q.size();
+ }
+ return priv->q.data();
+}
+
+/**
+ * garrow_tdigest_options_set_q:
+ * @options: A #GArrowTDigestOptions.
+ * @q: A `q` to be used.
+ *
+ * Since: 23.0.0
+ */
+void
+garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q)
+{
+ auto priv = garrow_tdigest_options_get_raw(options);
+ priv->q.clear();
+ priv->q.push_back(q);
+}
+
+/**
+ * garrow_tdigest_options_set_qs:
+ * @options: A #GArrowTDigestOptions.
+ * @qs: (array length=n): `q`s to be used.
+ * @n: The number of @qs.
+ *
+ * Since: 23.0.0
+ */
+void
+garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble
*qs, gsize n)
+{
+ auto priv = garrow_tdigest_options_get_raw(options);
+ priv->q.clear();
+ for (gsize i = 0; i < n; i++) {
+ priv->q.push_back(qs[i]);
+ }
+}
+
G_END_DECLS
arrow::Result<arrow::FieldRef>
@@ -10036,6 +10251,11 @@ garrow_function_options_new_raw(const
arrow::compute::FunctionOptions *arrow_opt
static_cast<const arrow::compute::SliceOptions *>(arrow_options);
auto options = garrow_slice_options_new_raw(arrow_slice_options);
return GARROW_FUNCTION_OPTIONS(options);
+ } else if (arrow_type_name == "TDigestOptions") {
+ const auto arrow_tdigest_options =
+ static_cast<const arrow::compute::TDigestOptions *>(arrow_options);
+ auto options = garrow_tdigest_options_new_raw(arrow_tdigest_options);
+ return GARROW_FUNCTION_OPTIONS(options);
} else {
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
return GARROW_FUNCTION_OPTIONS(options);
@@ -11071,3 +11291,29 @@ garrow_slice_options_get_raw(GArrowSliceOptions
*options)
return static_cast<arrow::compute::SliceOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}
+
+GArrowTDigestOptions *
+garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions
*arrow_options)
+{
+ auto options =
GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS,
+ "delta",
+ arrow_options->delta,
+ "buffer-size",
+
arrow_options->buffer_size,
+ "skip-nulls",
+ arrow_options->skip_nulls,
+ "min-count",
+ arrow_options->min_count,
+ nullptr));
+ garrow_tdigest_options_set_qs(options,
+ arrow_options->q.data(),
+ arrow_options->q.size());
+ return options;
+}
+
+arrow::compute::TDigestOptions *
+garrow_tdigest_options_get_raw(GArrowTDigestOptions *options)
+{
+ return static_cast<arrow::compute::TDigestOptions *>(
+ garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 37c2fee644..3a2be582a6 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1704,4 +1704,29 @@ GARROW_AVAILABLE_IN_23_0
GArrowSliceOptions *
garrow_slice_options_new(void);
+#define GARROW_TYPE_TDIGEST_OPTIONS (garrow_tdigest_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(GArrowTDigestOptions,
+ garrow_tdigest_options,
+ GARROW,
+ TDIGEST_OPTIONS,
+ GArrowFunctionOptions)
+struct _GArrowTDigestOptionsClass
+{
+ GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowTDigestOptions *
+garrow_tdigest_options_new(void);
+GARROW_AVAILABLE_IN_23_0
+const gdouble *
+garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n);
+GARROW_AVAILABLE_IN_23_0
+void
+garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q);
+GARROW_AVAILABLE_IN_23_0
+void
+garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble
*qs, gsize n);
+
G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index 1b32022092..25924d408f 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -313,3 +313,8 @@ GArrowSliceOptions *
garrow_slice_options_new_raw(const arrow::compute::SliceOptions
*arrow_options);
arrow::compute::SliceOptions *
garrow_slice_options_get_raw(GArrowSliceOptions *options);
+
+GArrowTDigestOptions *
+garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions
*arrow_options);
+arrow::compute::TDigestOptions *
+garrow_tdigest_options_get_raw(GArrowTDigestOptions *options);
diff --git a/c_glib/test/test-tdigest-options.rb
b/c_glib/test/test-tdigest-options.rb
new file mode 100644
index 0000000000..117e7ec056
--- /dev/null
+++ b/c_glib/test/test-tdigest-options.rb
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestTDigestOptions < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ @options = Arrow::TDigestOptions.new
+ end
+
+ def test_delta
+ assert_equal(100, @options.delta)
+ @options.delta = 200
+ assert_equal(200, @options.delta)
+ end
+
+ def test_buffer_size
+ assert_equal(500, @options.buffer_size)
+ @options.buffer_size = 1000
+ assert_equal(1000, @options.buffer_size)
+ end
+
+ def test_skip_nulls
+ assert do
+ @options.skip_nulls?
+ end
+ @options.skip_nulls = false
+ assert do
+ not @options.skip_nulls?
+ end
+ end
+
+ def test_min_count
+ assert_equal(0, @options.min_count)
+ @options.min_count = 1
+ assert_equal(1, @options.min_count)
+ end
+
+ def test_q
+ assert_equal([0.5], @options.qs)
+ @options.qs = [0.1, 0.2, 0.9]
+ assert_equal([0.1, 0.2, 0.9], @options.qs)
+ @options.q = 0.7
+ assert_equal([0.7], @options.qs)
+ end
+
+ def test_tdigest_function
+ args = [
+ Arrow::ArrayDatum.new(build_double_array([1.0, 2.0, 3.0, 4.0, 5.0])),
+ ]
+ @options.q = 0.5
+ @options.delta = 200
+ tdigest_function = Arrow::Function.find("tdigest")
+ result = tdigest_function.execute(args, @options).value
+ assert_equal(build_double_array([3.0]), result)
+ end
+end