This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d5b3b47378 GH-34425: [GLib] Add GArrowRankOptions (#34458)
d5b3b47378 is described below
commit d5b3b4737838774db658d3c488fcd3e72bc13f7e
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Mar 6 09:03:49 2023 +0900
GH-34425: [GLib] Add GArrowRankOptions (#34458)
### Rationale for this change
It's just missed.
### What changes are included in this PR?
Just add it.
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* Closes: #34425
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/compute.cpp | 345 ++++++++++++++++++++++++++++++++++-----
c_glib/arrow-glib/compute.h | 75 +++++++++
c_glib/arrow-glib/compute.hpp | 6 +
c_glib/test/test-rank-options.rb | 60 +++++++
4 files changed, 445 insertions(+), 41 deletions(-)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 04407a8818..bea56cbb42 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -104,12 +104,54 @@ namespace {
return true;
}
- bool
- garrow_sort_key_equal_raw(const arrow::compute::SortKey &sort_key,
- const arrow::compute::SortKey &other_sort_key) {
- return
- (sort_key.target == other_sort_key.target) &&
- (sort_key.order == other_sort_key.order);
+ GList *
+ garrow_sort_keys_new_raw(std::vector<arrow::compute::SortKey>
&arrow_sort_keys)
+ {
+ GList *sort_keys = NULL;
+ for (const auto &arrow_sort_key : arrow_sort_keys) {
+ auto sort_key = garrow_sort_key_new_raw(arrow_sort_key);
+ sort_keys = g_list_prepend(sort_keys, sort_key);
+ }
+ return g_list_reverse(sort_keys);
+ }
+
+ gboolean
+ garrow_raw_sort_keys_equal(
+ std::vector<arrow::compute::SortKey> &arrow_sort_keys,
+ std::vector<arrow::compute::SortKey> &arrow_other_sort_keys)
+ {
+ if (arrow_sort_keys.size() != arrow_other_sort_keys.size()) {
+ return FALSE;
+ }
+ const auto n_sort_keys = arrow_sort_keys.size();
+ for (size_t i = 0; i < n_sort_keys; ++i) {
+ auto arrow_sort_key = &(arrow_sort_keys[i]);
+ auto arrow_other_sort_key = &(arrow_other_sort_keys[i]);
+ if (!arrow_sort_key->Equals(*arrow_other_sort_key)) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+ }
+
+ void
+ garrow_raw_sort_keys_set(std::vector<arrow::compute::SortKey>
&arrow_sort_keys,
+ GList *sort_keys)
+ {
+ arrow_sort_keys.clear();
+ for (auto node = sort_keys; node; node = node->next) {
+ auto sort_key = GARROW_SORT_KEY(node->data);
+ auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
+ arrow_sort_keys.push_back(*arrow_sort_key);
+ }
+ }
+
+ void
+ garrow_raw_sort_keys_add(std::vector<arrow::compute::SortKey>
&arrow_sort_keys,
+ GArrowSortKey *sort_key)
+ {
+ auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
+ arrow_sort_keys.push_back(*arrow_sort_key);
}
}
@@ -2928,8 +2970,7 @@ garrow_sort_key_equal(GArrowSortKey *sort_key,
{
auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
auto arrow_other_sort_key = garrow_sort_key_get_raw(other_sort_key);
- return garrow_sort_key_equal_raw(*arrow_sort_key,
- *arrow_other_sort_key);
+ return arrow_sort_key->Equals(*arrow_other_sort_key);
}
@@ -2985,17 +3026,10 @@ garrow_sort_options_equal(GArrowSortOptions *options,
{
auto arrow_options = garrow_sort_options_get_raw(options);
auto arrow_other_options = garrow_sort_options_get_raw(other_options);
- if (arrow_options->sort_keys.size() !=
- arrow_other_options->sort_keys.size()) {
+ if (!garrow_raw_sort_keys_equal(arrow_options->sort_keys,
+ arrow_other_options->sort_keys)) {
return FALSE;
}
- const auto n_sort_keys = arrow_options->sort_keys.size();
- for (size_t i = 0; i < n_sort_keys; ++i) {
- if (!garrow_sort_key_equal_raw(arrow_options->sort_keys[i],
- arrow_other_options->sort_keys[i])) {
- return FALSE;
- }
- }
return TRUE;
}
@@ -3012,52 +3046,41 @@ GList *
garrow_sort_options_get_sort_keys(GArrowSortOptions *options)
{
auto arrow_options = garrow_sort_options_get_raw(options);
- GList *sort_keys = NULL;
- for (const auto &arrow_sort_key : arrow_options->sort_keys) {
- auto sort_key = garrow_sort_key_new_raw(arrow_sort_key);
- sort_keys = g_list_prepend(sort_keys, sort_key);
- }
- return g_list_reverse(sort_keys);
+ return garrow_sort_keys_new_raw(arrow_options->sort_keys);
}
/**
- * garrow_sort_options_add_sort_key:
+ * garrow_sort_options_set_sort_keys:
* @options: A #GArrowSortOptions.
- * @sort_key: The sort key to be added.
+ * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
*
- * Add a sort key to be used.
+ * Set sort keys to be used.
*
* Since: 3.0.0
*/
void
-garrow_sort_options_add_sort_key(GArrowSortOptions *options,
- GArrowSortKey *sort_key)
+garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
+ GList *sort_keys)
{
auto arrow_options = garrow_sort_options_get_raw(options);
- auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
- arrow_options->sort_keys.push_back(*arrow_sort_key);
+ garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
}
/**
- * garrow_sort_options_set_sort_keys:
+ * garrow_sort_options_add_sort_key:
* @options: A #GArrowSortOptions.
- * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
+ * @sort_key: The sort key to be added.
*
- * Set sort keys to be used.
+ * Add a sort key to be used.
*
* Since: 3.0.0
*/
void
-garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
- GList *sort_keys)
+garrow_sort_options_add_sort_key(GArrowSortOptions *options,
+ GArrowSortKey *sort_key)
{
auto arrow_options = garrow_sort_options_get_raw(options);
- arrow_options->sort_keys.clear();
- for (auto node = sort_keys; node; node = node->next) {
- auto sort_key = GARROW_SORT_KEY(node->data);
- auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
- arrow_options->sort_keys.push_back(*arrow_sort_key);
- }
+ garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
}
@@ -4006,6 +4029,220 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions
*options,
}
+enum {
+ PROP_RANK_OPTIONS_NULL_PLACEMENT = 1,
+ PROP_RANK_OPTIONS_TIEBREAKER,
+};
+
+G_DEFINE_TYPE(GArrowRankOptions,
+ garrow_rank_options,
+ GARROW_TYPE_FUNCTION_OPTIONS)
+
+#define GARROW_RANK_OPTIONS_GET_PRIVATE(object) \
+ static_cast<GArrowRankOptionsPrivate *>( \
+ garrow_rank_options_get_instance_private( \
+ GARROW_RANK_OPTIONS(object)))
+
+static void
+garrow_rank_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto options = garrow_rank_options_get_raw(GARROW_RANK_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_RANK_OPTIONS_NULL_PLACEMENT:
+ options->null_placement =
+ static_cast<arrow::compute::NullPlacement>(g_value_get_enum(value));
+ break;
+ case PROP_RANK_OPTIONS_TIEBREAKER:
+ options->tiebreaker =
+ static_cast<arrow::compute::RankOptions::Tiebreaker>(
+ g_value_get_enum(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_rank_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto options = garrow_rank_options_get_raw(GARROW_RANK_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_RANK_OPTIONS_NULL_PLACEMENT:
+ g_value_set_enum(
+ value,
+ static_cast<GArrowNullPlacement>(options->null_placement));
+ break;
+ case PROP_RANK_OPTIONS_TIEBREAKER:
+ g_value_set_enum(
+ value,
+ static_cast<GArrowRankTiebreaker>(options->tiebreaker));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_rank_options_init(GArrowRankOptions *object)
+{
+ auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+ priv->options = static_cast<arrow::compute::FunctionOptions *>(
+ new arrow::compute::RankOptions());
+}
+
+static void
+garrow_rank_options_class_init(GArrowRankOptionsClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->set_property = garrow_rank_options_set_property;
+ gobject_class->get_property = garrow_rank_options_get_property;
+
+
+ auto options = arrow::compute::RankOptions::Defaults();
+
+ GParamSpec *spec;
+ /**
+ * GArrowRankOptions:null-placement:
+ *
+ * Whether nulls and NaNs are placed at the start or at the end.
+ *
+ * Since: 12.0.0
+ */
+ spec = g_param_spec_enum("null-placement",
+ "Null placement",
+ "Whether nulls and NaNs are placed "
+ "at the start or at the end.",
+ GARROW_TYPE_NULL_PLACEMENT,
+ static_cast<GArrowNullPlacement>(
+ options.null_placement),
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
+ PROP_RANK_OPTIONS_NULL_PLACEMENT,
+ spec);
+
+ /**
+ * GArrowRankOptions:tiebreaker:
+ *
+ * Tiebreaker for dealing with equal values in ranks.
+ *
+ * Since: 12.0.0
+ */
+ spec = g_param_spec_enum("tiebreaker",
+ "Tiebreaker",
+ "Tiebreaker for dealing with equal values in
ranks.",
+ GARROW_TYPE_RANK_TIEBREAKER,
+ static_cast<GArrowRankTiebreaker>(
+ options.tiebreaker),
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
+ PROP_RANK_OPTIONS_TIEBREAKER,
+ spec);
+}
+
+/**
+ * garrow_rank_options_new:
+ *
+ * Returns: A newly created #GArrowRankOptions.
+ *
+ * Since: 12.0.0
+ */
+GArrowRankOptions *
+garrow_rank_options_new(void)
+{
+ return GARROW_RANK_OPTIONS(g_object_new(GARROW_TYPE_RANK_OPTIONS, nullptr));
+}
+
+/**
+ * garrow_rank_options_equal:
+ * @options: A #GArrowRankOptions.
+ * @other_options: A #GArrowRankOptions to be compared.
+ *
+ * Returns: %TRUE if both of them have the same option values, %FALSE
+ * otherwise.
+ *
+ * Since: 12.0.0
+ */
+gboolean
+garrow_rank_options_equal(GArrowRankOptions *options,
+ GArrowRankOptions *other_options)
+{
+ auto arrow_options = garrow_rank_options_get_raw(options);
+ auto arrow_other_options = garrow_rank_options_get_raw(other_options);
+ if (!garrow_raw_sort_keys_equal(arrow_options->sort_keys,
+ arrow_other_options->sort_keys)) {
+ return FALSE;
+ }
+ if (arrow_options->null_placement != arrow_other_options->null_placement) {
+ return FALSE;
+ }
+ if (arrow_options->tiebreaker != arrow_other_options->tiebreaker) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+/**
+ * garrow_rank_options_get_sort_keys:
+ * @options: A #GArrowSortOptions.
+ *
+ * Returns: (transfer full) (element-type GArrowSortKey):
+ * The sort keys to be used.
+ *
+ * Since: 12.0.0
+ */
+GList *
+garrow_rank_options_get_sort_keys(GArrowRankOptions *options)
+{
+ auto arrow_options = garrow_rank_options_get_raw(options);
+ return garrow_sort_keys_new_raw(arrow_options->sort_keys);
+}
+
+/**
+ * garrow_rank_options_set_sort_keys:
+ * @options: A #GArrowRankOptions.
+ * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
+ *
+ * Set sort keys to be used.
+ *
+ * Since: 12.0.0
+ */
+void
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
+ GList *sort_keys)
+{
+ auto arrow_options = garrow_rank_options_get_raw(options);
+ garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
+}
+
+/**
+ * garrow_rank_options_add_sort_key:
+ * @options: A #GArrowRankOptions.
+ * @sort_key: The sort key to be added.
+ *
+ * Add a sort key to be used.
+ *
+ * Since: 12.0.0
+ */
+void
+garrow_rank_options_add_sort_key(GArrowRankOptions *options,
+ GArrowSortKey *sort_key)
+{
+ auto arrow_options = garrow_rank_options_get_raw(options);
+ garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
+}
+
+
/**
* garrow_array_cast:
* @array: A #GArrowArray.
@@ -5279,6 +5516,11 @@ garrow_function_options_new_raw(
static_cast<const arrow::compute::QuantileOptions *>(arrow_options);
auto options = garrow_quantile_options_new_raw(arrow_quantile_options);
return GARROW_FUNCTION_OPTIONS(options);
+ } else if (arrow_type_name == "RankOptions") {
+ const auto arrow_rank_options =
+ static_cast<const arrow::compute::RankOptions *>(arrow_options);
+ auto options = garrow_rank_options_new_raw(arrow_rank_options);
+ return GARROW_FUNCTION_OPTIONS(options);
} else {
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS,
NULL);
@@ -5665,3 +5907,24 @@ garrow_quantile_options_get_raw(GArrowQuantileOptions
*options)
return static_cast<arrow::compute::QuantileOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}
+
+
+GArrowRankOptions *
+garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options)
+{
+ auto options = GARROW_RANK_OPTIONS(
+ g_object_new(GARROW_TYPE_RANK_OPTIONS,
+ "null-placement", arrow_options->null_placement,
+ "tiebreaker", arrow_options->tiebreaker,
+ nullptr));
+ auto arrow_new_options = garrow_rank_options_get_raw(options);
+ arrow_new_options->sort_keys = arrow_options->sort_keys;
+ return options;
+}
+
+arrow::compute::RankOptions *
+garrow_rank_options_get_raw(GArrowRankOptions *options)
+{
+ return static_cast<arrow::compute::RankOptions *>(
+ garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 2b37efec88..cabb4a1482 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -493,6 +493,24 @@ typedef enum {
GARROW_SORT_ORDER_DESCENDING,
} GArrowSortOrder;
+/**
+ * GArrowNullPlacement:
+ * @GARROW_NULL_PLACEMENT_AT_START:
+ * Place nulls and NaNs before any non-null values.
+ * NaNs will come after nulls.
+ * @GARROW_NULL_PLACEMENT_AT_END:
+ * Place nulls and NaNs after any non-null values.
+ * NaNs will come before nulls.
+ *
+ * They are corresponding to `arrow::compute::NullPlacement` values.
+ *
+ * Since: 12.0.0
+ */
+typedef enum /*<prefix=GARROW_NULL_PLACEMENT_>*/ {
+ GARROW_NULL_PLACEMENT_AT_START,
+ GARROW_NULL_PLACEMENT_AT_END,
+} GArrowNullPlacement;
+
#define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions,
garrow_array_sort_options,
@@ -766,6 +784,63 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions
*options,
gsize n);
+/**
+ * GArrowRankTiebreader:
+ * @GARROW_RANK_TIEBREAKER_MIN:
+ * Ties get the smallest possible rank in sorted order.
+ * @GARROW_RANK_TIEBREAKER_MAX:
+ * Ties get the largest possible rank in sorted order.
+ * @GARROW_RANK_TIEBREAKER_FIRST:
+ * Ranks are assigned in order of when ties appear in the input.
+ * This ensures the ranks are a stable permutation of the input.
+ * @GARROW_RANK_TIEBREAKER_DENSE:
+ * The ranks span a dense [1, M] interval where M is the number
+ * of distinct values in the input.
+ *
+ * They correspond to the values of
+ * `arrow::compute::RankOptions::Tiebreaker`.
+ *
+ * Since: 12.0.0
+ */
+typedef enum {
+ GARROW_RANK_TIEBREAKER_MIN,
+ GARROW_RANK_TIEBREAKER_MAX,
+ GARROW_RANK_TIEBREAKER_FIRST,
+ GARROW_RANK_TIEBREAKER_DENSE,
+} GArrowRankTiebreaker;
+
+#define GARROW_TYPE_RANK_OPTIONS \
+ (garrow_rank_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowRankOptions,
+ garrow_rank_options,
+ GARROW,
+ RANK_OPTIONS,
+ GArrowFunctionOptions)
+struct _GArrowRankOptionsClass
+{
+ GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_12_0
+GArrowRankOptions *
+garrow_rank_options_new(void);
+GARROW_AVAILABLE_IN_12_0
+gboolean
+garrow_rank_options_equal(GArrowRankOptions *options,
+ GArrowRankOptions *other_options);
+GARROW_AVAILABLE_IN_12_0
+GList *
+garrow_rank_options_get_sort_keys(GArrowRankOptions *options);
+GARROW_AVAILABLE_IN_12_0
+void
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
+ GList *sort_keys);
+GARROW_AVAILABLE_IN_12_0
+void
+garrow_rank_options_add_sort_key(GArrowRankOptions *options,
+ GArrowSortKey *sort_key);
+
+
GArrowArray *garrow_array_cast(GArrowArray *array,
GArrowDataType *target_data_type,
GArrowCastOptions *options,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index f7de4d7cf8..05a1de1a56 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -162,3 +162,9 @@ garrow_quantile_options_new_raw(
const arrow::compute::QuantileOptions *arrow_options);
arrow::compute::QuantileOptions *
garrow_quantile_options_get_raw(GArrowQuantileOptions *options);
+
+
+GArrowRankOptions *
+garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options);
+arrow::compute::RankOptions *
+garrow_rank_options_get_raw(GArrowRankOptions *options);
diff --git a/c_glib/test/test-rank-options.rb b/c_glib/test/test-rank-options.rb
new file mode 100644
index 0000000000..06806035cd
--- /dev/null
+++ b/c_glib/test/test-rank-options.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRankOptions < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ @options = Arrow::Function.find("rank").default_options
+ end
+
+ def test_equal
+ assert_equal(Arrow::SortOptions.new,
+ Arrow::SortOptions.new)
+ end
+
+ def test_sort_keys
+ sort_keys = [
+ Arrow::SortKey.new("column1", :ascending),
+ Arrow::SortKey.new("column2", :descending),
+ ]
+ @options.sort_keys = sort_keys
+ assert_equal(sort_keys, @options.sort_keys)
+ end
+
+ def test_add_sort_key
+ @options.add_sort_key(Arrow::SortKey.new("column1", :ascending))
+ @options.add_sort_key(Arrow::SortKey.new("column2", :descending))
+ assert_equal([
+ Arrow::SortKey.new("column1", :ascending),
+ Arrow::SortKey.new("column2", :descending),
+ ],
+ @options.sort_keys)
+ end
+
+ def test_null_placement
+ assert_equal(Arrow::NullPlacement::AT_END, @options.null_placement)
+ @options.null_placement = :at_start
+ assert_equal(Arrow::NullPlacement::AT_START, @options.null_placement)
+ end
+
+ def test_tiebreaker
+ assert_equal(Arrow::RankTiebreaker::FIRST, @options.tiebreaker)
+ @options.tiebreaker = :max
+ assert_equal(Arrow::RankTiebreaker::MAX, @options.tiebreaker)
+ end
+end