This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d5b3b47378 GH-34425: [GLib] Add GArrowRankOptions (#34458)
d5b3b47378 is described below

commit d5b3b4737838774db658d3c488fcd3e72bc13f7e
Author: Sutou Kouhei <[email protected]>
AuthorDate: Mon Mar 6 09:03:49 2023 +0900

    GH-34425: [GLib] Add GArrowRankOptions (#34458)
    
    ### Rationale for this change
    
    It's just missed.
    
    ### What changes are included in this PR?
    
    Just add it.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * Closes: #34425
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 c_glib/arrow-glib/compute.cpp    | 345 ++++++++++++++++++++++++++++++++++-----
 c_glib/arrow-glib/compute.h      |  75 +++++++++
 c_glib/arrow-glib/compute.hpp    |   6 +
 c_glib/test/test-rank-options.rb |  60 +++++++
 4 files changed, 445 insertions(+), 41 deletions(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 04407a8818..bea56cbb42 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -104,12 +104,54 @@ namespace {
     return true;
   }
 
-  bool
-  garrow_sort_key_equal_raw(const arrow::compute::SortKey &sort_key,
-                            const arrow::compute::SortKey &other_sort_key) {
-    return
-      (sort_key.target == other_sort_key.target) &&
-      (sort_key.order == other_sort_key.order);
+  GList *
+  garrow_sort_keys_new_raw(std::vector<arrow::compute::SortKey> 
&arrow_sort_keys)
+  {
+    GList *sort_keys = NULL;
+    for (const auto &arrow_sort_key : arrow_sort_keys) {
+      auto sort_key = garrow_sort_key_new_raw(arrow_sort_key);
+      sort_keys = g_list_prepend(sort_keys, sort_key);
+    }
+    return g_list_reverse(sort_keys);
+  }
+
+  gboolean
+  garrow_raw_sort_keys_equal(
+    std::vector<arrow::compute::SortKey> &arrow_sort_keys,
+    std::vector<arrow::compute::SortKey> &arrow_other_sort_keys)
+  {
+    if (arrow_sort_keys.size() != arrow_other_sort_keys.size()) {
+      return FALSE;
+    }
+    const auto n_sort_keys = arrow_sort_keys.size();
+    for (size_t i = 0; i < n_sort_keys; ++i) {
+      auto arrow_sort_key = &(arrow_sort_keys[i]);
+      auto arrow_other_sort_key = &(arrow_other_sort_keys[i]);
+      if (!arrow_sort_key->Equals(*arrow_other_sort_key)) {
+        return FALSE;
+      }
+    }
+    return TRUE;
+  }
+
+  void
+  garrow_raw_sort_keys_set(std::vector<arrow::compute::SortKey> 
&arrow_sort_keys,
+                           GList *sort_keys)
+  {
+    arrow_sort_keys.clear();
+    for (auto node = sort_keys; node; node = node->next) {
+      auto sort_key = GARROW_SORT_KEY(node->data);
+      auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
+      arrow_sort_keys.push_back(*arrow_sort_key);
+    }
+  }
+
+  void
+  garrow_raw_sort_keys_add(std::vector<arrow::compute::SortKey> 
&arrow_sort_keys,
+                           GArrowSortKey *sort_key)
+  {
+    auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
+    arrow_sort_keys.push_back(*arrow_sort_key);
   }
 }
 
@@ -2928,8 +2970,7 @@ garrow_sort_key_equal(GArrowSortKey *sort_key,
 {
   auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
   auto arrow_other_sort_key = garrow_sort_key_get_raw(other_sort_key);
-  return garrow_sort_key_equal_raw(*arrow_sort_key,
-                                   *arrow_other_sort_key);
+  return arrow_sort_key->Equals(*arrow_other_sort_key);
 }
 
 
@@ -2985,17 +3026,10 @@ garrow_sort_options_equal(GArrowSortOptions *options,
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
   auto arrow_other_options = garrow_sort_options_get_raw(other_options);
-  if (arrow_options->sort_keys.size() !=
-      arrow_other_options->sort_keys.size()) {
+  if (!garrow_raw_sort_keys_equal(arrow_options->sort_keys,
+                                  arrow_other_options->sort_keys)) {
     return FALSE;
   }
-  const auto n_sort_keys = arrow_options->sort_keys.size();
-  for (size_t i = 0; i < n_sort_keys; ++i) {
-    if (!garrow_sort_key_equal_raw(arrow_options->sort_keys[i],
-                                   arrow_other_options->sort_keys[i])) {
-      return FALSE;
-    }
-  }
   return TRUE;
 }
 
@@ -3012,52 +3046,41 @@ GList *
 garrow_sort_options_get_sort_keys(GArrowSortOptions *options)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
-  GList *sort_keys = NULL;
-  for (const auto &arrow_sort_key : arrow_options->sort_keys) {
-    auto sort_key = garrow_sort_key_new_raw(arrow_sort_key);
-    sort_keys = g_list_prepend(sort_keys, sort_key);
-  }
-  return g_list_reverse(sort_keys);
+  return garrow_sort_keys_new_raw(arrow_options->sort_keys);
 }
 
 /**
- * garrow_sort_options_add_sort_key:
+ * garrow_sort_options_set_sort_keys:
  * @options: A #GArrowSortOptions.
- * @sort_key: The sort key to be added.
+ * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
  *
- * Add a sort key to be used.
+ * Set sort keys to be used.
  *
  * Since: 3.0.0
  */
 void
-garrow_sort_options_add_sort_key(GArrowSortOptions *options,
-                                 GArrowSortKey *sort_key)
+garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
+                                  GList *sort_keys)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
-  auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
-  arrow_options->sort_keys.push_back(*arrow_sort_key);
+  garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
 }
 
 /**
- * garrow_sort_options_set_sort_keys:
+ * garrow_sort_options_add_sort_key:
  * @options: A #GArrowSortOptions.
- * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
+ * @sort_key: The sort key to be added.
  *
- * Set sort keys to be used.
+ * Add a sort key to be used.
  *
  * Since: 3.0.0
  */
 void
-garrow_sort_options_set_sort_keys(GArrowSortOptions *options,
-                                  GList *sort_keys)
+garrow_sort_options_add_sort_key(GArrowSortOptions *options,
+                                 GArrowSortKey *sort_key)
 {
   auto arrow_options = garrow_sort_options_get_raw(options);
-  arrow_options->sort_keys.clear();
-  for (auto node = sort_keys; node; node = node->next) {
-    auto sort_key = GARROW_SORT_KEY(node->data);
-    auto arrow_sort_key = garrow_sort_key_get_raw(sort_key);
-    arrow_options->sort_keys.push_back(*arrow_sort_key);
-  }
+  garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
 }
 
 
@@ -4006,6 +4029,220 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions 
*options,
 }
 
 
+enum {
+  PROP_RANK_OPTIONS_NULL_PLACEMENT = 1,
+  PROP_RANK_OPTIONS_TIEBREAKER,
+};
+
+G_DEFINE_TYPE(GArrowRankOptions,
+              garrow_rank_options,
+              GARROW_TYPE_FUNCTION_OPTIONS)
+
+#define GARROW_RANK_OPTIONS_GET_PRIVATE(object)          \
+  static_cast<GArrowRankOptionsPrivate *>(               \
+    garrow_rank_options_get_instance_private(            \
+      GARROW_RANK_OPTIONS(object)))
+
+static void
+garrow_rank_options_set_property(GObject *object,
+                                 guint prop_id,
+                                 const GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto options = garrow_rank_options_get_raw(GARROW_RANK_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_RANK_OPTIONS_NULL_PLACEMENT:
+    options->null_placement =
+      static_cast<arrow::compute::NullPlacement>(g_value_get_enum(value));
+    break;
+  case PROP_RANK_OPTIONS_TIEBREAKER:
+    options->tiebreaker =
+      static_cast<arrow::compute::RankOptions::Tiebreaker>(
+        g_value_get_enum(value));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_rank_options_get_property(GObject *object,
+                                 guint prop_id,
+                                 GValue *value,
+                                 GParamSpec *pspec)
+{
+  auto options = garrow_rank_options_get_raw(GARROW_RANK_OPTIONS(object));
+
+  switch (prop_id) {
+  case PROP_RANK_OPTIONS_NULL_PLACEMENT:
+    g_value_set_enum(
+      value,
+      static_cast<GArrowNullPlacement>(options->null_placement));
+    break;
+  case PROP_RANK_OPTIONS_TIEBREAKER:
+    g_value_set_enum(
+      value,
+      static_cast<GArrowRankTiebreaker>(options->tiebreaker));
+    break;
+  default:
+    G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+    break;
+  }
+}
+
+static void
+garrow_rank_options_init(GArrowRankOptions *object)
+{
+  auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+  priv->options = static_cast<arrow::compute::FunctionOptions *>(
+    new arrow::compute::RankOptions());
+}
+
+static void
+garrow_rank_options_class_init(GArrowRankOptionsClass *klass)
+{
+  auto gobject_class = G_OBJECT_CLASS(klass);
+
+  gobject_class->set_property = garrow_rank_options_set_property;
+  gobject_class->get_property = garrow_rank_options_get_property;
+
+
+  auto options = arrow::compute::RankOptions::Defaults();
+
+  GParamSpec *spec;
+  /**
+   * GArrowRankOptions:null-placement:
+   *
+   * Whether nulls and NaNs are placed at the start or at the end.
+   *
+   * Since: 12.0.0
+   */
+  spec = g_param_spec_enum("null-placement",
+                           "Null placement",
+                           "Whether nulls and NaNs are placed "
+                           "at the start or at the end.",
+                           GARROW_TYPE_NULL_PLACEMENT,
+                           static_cast<GArrowNullPlacement>(
+                             options.null_placement),
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_RANK_OPTIONS_NULL_PLACEMENT,
+                                  spec);
+
+  /**
+   * GArrowRankOptions:tiebreaker:
+   *
+   * Tiebreaker for dealing with equal values in ranks.
+   *
+   * Since: 12.0.0
+   */
+  spec = g_param_spec_enum("tiebreaker",
+                           "Tiebreaker",
+                           "Tiebreaker for dealing with equal values in 
ranks.",
+                           GARROW_TYPE_RANK_TIEBREAKER,
+                           static_cast<GArrowRankTiebreaker>(
+                             options.tiebreaker),
+                           static_cast<GParamFlags>(G_PARAM_READWRITE));
+  g_object_class_install_property(gobject_class,
+                                  PROP_RANK_OPTIONS_TIEBREAKER,
+                                  spec);
+}
+
+/**
+ * garrow_rank_options_new:
+ *
+ * Returns: A newly created #GArrowRankOptions.
+ *
+ * Since: 12.0.0
+ */
+GArrowRankOptions *
+garrow_rank_options_new(void)
+{
+  return GARROW_RANK_OPTIONS(g_object_new(GARROW_TYPE_RANK_OPTIONS, nullptr));
+}
+
+/**
+ * garrow_rank_options_equal:
+ * @options: A #GArrowRankOptions.
+ * @other_options: A #GArrowRankOptions to be compared.
+ *
+ * Returns: %TRUE if both of them have the same option values, %FALSE
+ *   otherwise.
+ *
+ * Since: 12.0.0
+ */
+gboolean
+garrow_rank_options_equal(GArrowRankOptions *options,
+                          GArrowRankOptions *other_options)
+{
+  auto arrow_options = garrow_rank_options_get_raw(options);
+  auto arrow_other_options = garrow_rank_options_get_raw(other_options);
+  if (!garrow_raw_sort_keys_equal(arrow_options->sort_keys,
+                                  arrow_other_options->sort_keys)) {
+    return FALSE;
+  }
+  if (arrow_options->null_placement != arrow_other_options->null_placement) {
+    return FALSE;
+  }
+  if (arrow_options->tiebreaker != arrow_other_options->tiebreaker) {
+    return FALSE;
+  }
+  return TRUE;
+}
+
+/**
+ * garrow_rank_options_get_sort_keys:
+ * @options: A #GArrowSortOptions.
+ *
+ * Returns: (transfer full) (element-type GArrowSortKey):
+ *   The sort keys to be used.
+ *
+ * Since: 12.0.0
+ */
+GList *
+garrow_rank_options_get_sort_keys(GArrowRankOptions *options)
+{
+  auto arrow_options = garrow_rank_options_get_raw(options);
+  return garrow_sort_keys_new_raw(arrow_options->sort_keys);
+}
+
+/**
+ * garrow_rank_options_set_sort_keys:
+ * @options: A #GArrowRankOptions.
+ * @sort_keys: (element-type GArrowSortKey): The sort keys to be used.
+ *
+ * Set sort keys to be used.
+ *
+ * Since: 12.0.0
+ */
+void
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
+                                  GList *sort_keys)
+{
+  auto arrow_options = garrow_rank_options_get_raw(options);
+  garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys);
+}
+
+/**
+ * garrow_rank_options_add_sort_key:
+ * @options: A #GArrowRankOptions.
+ * @sort_key: The sort key to be added.
+ *
+ * Add a sort key to be used.
+ *
+ * Since: 12.0.0
+ */
+void
+garrow_rank_options_add_sort_key(GArrowRankOptions *options,
+                                 GArrowSortKey *sort_key)
+{
+  auto arrow_options = garrow_rank_options_get_raw(options);
+  garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key);
+}
+
+
 /**
  * garrow_array_cast:
  * @array: A #GArrowArray.
@@ -5279,6 +5516,11 @@ garrow_function_options_new_raw(
       static_cast<const arrow::compute::QuantileOptions *>(arrow_options);
     auto options = garrow_quantile_options_new_raw(arrow_quantile_options);
     return GARROW_FUNCTION_OPTIONS(options);
+  } else if (arrow_type_name == "RankOptions") {
+    const auto arrow_rank_options =
+      static_cast<const arrow::compute::RankOptions *>(arrow_options);
+    auto options = garrow_rank_options_new_raw(arrow_rank_options);
+    return GARROW_FUNCTION_OPTIONS(options);
   } else {
     auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS,
                                 NULL);
@@ -5665,3 +5907,24 @@ garrow_quantile_options_get_raw(GArrowQuantileOptions 
*options)
   return static_cast<arrow::compute::QuantileOptions *>(
     garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
 }
+
+
+GArrowRankOptions *
+garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options)
+{
+  auto options = GARROW_RANK_OPTIONS(
+    g_object_new(GARROW_TYPE_RANK_OPTIONS,
+                 "null-placement", arrow_options->null_placement,
+                 "tiebreaker", arrow_options->tiebreaker,
+                 nullptr));
+  auto arrow_new_options = garrow_rank_options_get_raw(options);
+  arrow_new_options->sort_keys = arrow_options->sort_keys;
+  return options;
+}
+
+arrow::compute::RankOptions *
+garrow_rank_options_get_raw(GArrowRankOptions *options)
+{
+  return static_cast<arrow::compute::RankOptions *>(
+    garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 2b37efec88..cabb4a1482 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -493,6 +493,24 @@ typedef enum {
   GARROW_SORT_ORDER_DESCENDING,
 } GArrowSortOrder;
 
+/**
+ * GArrowNullPlacement:
+ * @GARROW_NULL_PLACEMENT_AT_START:
+ *   Place nulls and NaNs before any non-null values.
+ *   NaNs will come after nulls.
+ * @GARROW_NULL_PLACEMENT_AT_END:
+ *   Place nulls and NaNs after any non-null values.
+ *   NaNs will come before nulls.
+ *
+ * They are corresponding to `arrow::compute::NullPlacement` values.
+ *
+ * Since: 12.0.0
+ */
+typedef enum /*<prefix=GARROW_NULL_PLACEMENT_>*/ {
+  GARROW_NULL_PLACEMENT_AT_START,
+  GARROW_NULL_PLACEMENT_AT_END,
+} GArrowNullPlacement;
+
 #define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions,
                          garrow_array_sort_options,
@@ -766,6 +784,63 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions 
*options,
                                gsize n);
 
 
+/**
+ * GArrowRankTiebreader:
+ * @GARROW_RANK_TIEBREAKER_MIN:
+ *   Ties get the smallest possible rank in sorted order.
+ * @GARROW_RANK_TIEBREAKER_MAX:
+ *   Ties get the largest possible rank in sorted order.
+ * @GARROW_RANK_TIEBREAKER_FIRST:
+ *   Ranks are assigned in order of when ties appear in the input.
+ *   This ensures the ranks are a stable permutation of the input.
+ * @GARROW_RANK_TIEBREAKER_DENSE:
+ *   The ranks span a dense [1, M] interval where M is the number
+ *   of distinct values in the input.
+ *
+ * They correspond to the values of
+ * `arrow::compute::RankOptions::Tiebreaker`.
+ *
+ * Since: 12.0.0
+ */
+typedef enum {
+  GARROW_RANK_TIEBREAKER_MIN,
+  GARROW_RANK_TIEBREAKER_MAX,
+  GARROW_RANK_TIEBREAKER_FIRST,
+  GARROW_RANK_TIEBREAKER_DENSE,
+} GArrowRankTiebreaker;
+
+#define GARROW_TYPE_RANK_OPTIONS                \
+  (garrow_rank_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowRankOptions,
+                         garrow_rank_options,
+                         GARROW,
+                         RANK_OPTIONS,
+                         GArrowFunctionOptions)
+struct _GArrowRankOptionsClass
+{
+  GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_12_0
+GArrowRankOptions *
+garrow_rank_options_new(void);
+GARROW_AVAILABLE_IN_12_0
+gboolean
+garrow_rank_options_equal(GArrowRankOptions *options,
+                          GArrowRankOptions *other_options);
+GARROW_AVAILABLE_IN_12_0
+GList *
+garrow_rank_options_get_sort_keys(GArrowRankOptions *options);
+GARROW_AVAILABLE_IN_12_0
+void
+garrow_rank_options_set_sort_keys(GArrowRankOptions *options,
+                                  GList *sort_keys);
+GARROW_AVAILABLE_IN_12_0
+void
+garrow_rank_options_add_sort_key(GArrowRankOptions *options,
+                                 GArrowSortKey *sort_key);
+
+
 GArrowArray *garrow_array_cast(GArrowArray *array,
                                GArrowDataType *target_data_type,
                                GArrowCastOptions *options,
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index f7de4d7cf8..05a1de1a56 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -162,3 +162,9 @@ garrow_quantile_options_new_raw(
   const arrow::compute::QuantileOptions *arrow_options);
 arrow::compute::QuantileOptions *
 garrow_quantile_options_get_raw(GArrowQuantileOptions *options);
+
+
+GArrowRankOptions *
+garrow_rank_options_new_raw(const arrow::compute::RankOptions *arrow_options);
+arrow::compute::RankOptions *
+garrow_rank_options_get_raw(GArrowRankOptions *options);
diff --git a/c_glib/test/test-rank-options.rb b/c_glib/test/test-rank-options.rb
new file mode 100644
index 0000000000..06806035cd
--- /dev/null
+++ b/c_glib/test/test-rank-options.rb
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestRankOptions < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def setup
+    @options = Arrow::Function.find("rank").default_options
+  end
+
+  def test_equal
+    assert_equal(Arrow::SortOptions.new,
+                 Arrow::SortOptions.new)
+  end
+
+  def test_sort_keys
+    sort_keys = [
+      Arrow::SortKey.new("column1", :ascending),
+      Arrow::SortKey.new("column2", :descending),
+    ]
+    @options.sort_keys = sort_keys
+    assert_equal(sort_keys, @options.sort_keys)
+  end
+
+  def test_add_sort_key
+    @options.add_sort_key(Arrow::SortKey.new("column1", :ascending))
+    @options.add_sort_key(Arrow::SortKey.new("column2", :descending))
+    assert_equal([
+                   Arrow::SortKey.new("column1", :ascending),
+                   Arrow::SortKey.new("column2", :descending),
+                 ],
+                 @options.sort_keys)
+  end
+
+  def test_null_placement
+    assert_equal(Arrow::NullPlacement::AT_END, @options.null_placement)
+    @options.null_placement = :at_start
+    assert_equal(Arrow::NullPlacement::AT_START, @options.null_placement)
+  end
+
+  def test_tiebreaker
+    assert_equal(Arrow::RankTiebreaker::FIRST, @options.tiebreaker)
+    @options.tiebreaker = :max
+    assert_equal(Arrow::RankTiebreaker::MAX, @options.tiebreaker)
+  end
+end

Reply via email to