This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7b812b4584 GH-48482: [GLib][Ruby] Add GArrowExtractRegexSpanOptions
(#48483)
7b812b4584 is described below
commit 7b812b4584c7e4c68654ab24974887230723f270
Author: Sten Larsson <[email protected]>
AuthorDate: Mon Dec 15 06:08:47 2025 +0100
GH-48482: [GLib][Ruby] Add GArrowExtractRegexSpanOptions (#48483)
### Rationale for this change
The `ExtractRegexSpanOptions` class is not available in GLib/Ruby, and it
is used together with the `extract_regex_span` compute function.
### What changes are included in this PR?
This adds the `ExtractRegexSpanOptions` class to GLib.
### Are these changes tested?
Yes, with Ruby unit tests.
### Are there any user-facing changes?
Yes, a new class.
* GitHub Issue: #48482
Authored-by: Sten Larsson <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/compute.cpp | 123 +++++++++++++++++++++++++
c_glib/arrow-glib/compute.h | 17 ++++
c_glib/arrow-glib/compute.hpp | 6 ++
c_glib/test/test-extract-regex-span-options.rb | 57 ++++++++++++
4 files changed, 203 insertions(+)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 831c0e4366..15af5f5013 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -269,6 +269,9 @@ G_BEGIN_DECLS
* #GArrowExtractRegexOptions is a class to customize the `extract_regex`
* function.
*
+ * #GArrowExtractRegexSpanOptions is a class to customize the
`extract_regex_span`
+ * function.
+ *
* There are many functions to compute data on an array.
*/
@@ -7091,6 +7094,102 @@ garrow_extract_regex_options_new(void)
return GARROW_EXTRACT_REGEX_OPTIONS(options);
}
+enum {
+ PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN = 1,
+};
+
+G_DEFINE_TYPE(GArrowExtractRegexSpanOptions,
+ garrow_extract_regex_span_options,
+ GARROW_TYPE_FUNCTION_OPTIONS)
+
+static void
+garrow_extract_regex_span_options_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
+
garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN:
+ options->pattern = g_value_get_string(value);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_extract_regex_span_options_get_property(GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ auto options =
+
garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object));
+
+ switch (prop_id) {
+ case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN:
+ g_value_set_string(value, options->pattern.c_str());
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_extract_regex_span_options_init(GArrowExtractRegexSpanOptions *object)
+{
+ auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object);
+ priv->options = static_cast<arrow::compute::FunctionOptions *>(
+ new arrow::compute::ExtractRegexSpanOptions());
+}
+
+static void
+garrow_extract_regex_span_options_class_init(GArrowExtractRegexSpanOptionsClass
*klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->set_property = garrow_extract_regex_span_options_set_property;
+ gobject_class->get_property = garrow_extract_regex_span_options_get_property;
+
+ arrow::compute::ExtractRegexSpanOptions options;
+
+ GParamSpec *spec;
+ /**
+ * GArrowExtractRegexSpanOptions:pattern:
+ *
+ * Regular expression with named capture fields.
+ *
+ * Since: 23.0.0
+ */
+ spec = g_param_spec_string("pattern",
+ "Pattern",
+ "Regular expression with named capture fields",
+ options.pattern.c_str(),
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class,
+ PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN,
+ spec);
+}
+
+/**
+ * garrow_extract_regex_span_options_new:
+ *
+ * Returns: A newly created #GArrowExtractRegexSpanOptions.
+ *
+ * Since: 23.0.0
+ */
+GArrowExtractRegexSpanOptions *
+garrow_extract_regex_span_options_new(void)
+{
+ auto options = g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS, NULL);
+ return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(options);
+}
+
G_END_DECLS
arrow::Result<arrow::FieldRef>
@@ -7254,6 +7353,12 @@ garrow_function_options_new_raw(const
arrow::compute::FunctionOptions *arrow_opt
static_cast<const arrow::compute::ExtractRegexOptions *>(arrow_options);
auto options =
garrow_extract_regex_options_new_raw(arrow_extract_regex_options);
return GARROW_FUNCTION_OPTIONS(options);
+ } else if (arrow_type_name == "ExtractRegexSpanOptions") {
+ const auto arrow_extract_regex_span_options =
+ static_cast<const arrow::compute::ExtractRegexSpanOptions
*>(arrow_options);
+ auto options =
+
garrow_extract_regex_span_options_new_raw(arrow_extract_regex_span_options);
+ return GARROW_FUNCTION_OPTIONS(options);
} else {
auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL);
return GARROW_FUNCTION_OPTIONS(options);
@@ -7893,3 +7998,21 @@
garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options)
return static_cast<arrow::compute::ExtractRegexOptions *>(
garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
}
+
+GArrowExtractRegexSpanOptions *
+garrow_extract_regex_span_options_new_raw(
+ const arrow::compute::ExtractRegexSpanOptions *arrow_options)
+{
+ return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(
+ g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS,
+ "pattern",
+ arrow_options->pattern.c_str(),
+ NULL));
+}
+
+arrow::compute::ExtractRegexSpanOptions *
+garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions
*options)
+{
+ return static_cast<arrow::compute::ExtractRegexSpanOptions *>(
+ garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options)));
+}
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 17deb2c4b3..7e4f92cff0 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -1272,4 +1272,21 @@ GARROW_AVAILABLE_IN_23_0
GArrowExtractRegexOptions *
garrow_extract_regex_options_new(void);
+#define GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS
\
+ (garrow_extract_regex_span_options_get_type())
+GARROW_AVAILABLE_IN_23_0
+G_DECLARE_DERIVABLE_TYPE(GArrowExtractRegexSpanOptions,
+ garrow_extract_regex_span_options,
+ GARROW,
+ EXTRACT_REGEX_SPAN_OPTIONS,
+ GArrowFunctionOptions)
+struct _GArrowExtractRegexSpanOptionsClass
+{
+ GArrowFunctionOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_23_0
+GArrowExtractRegexSpanOptions *
+garrow_extract_regex_span_options_new(void);
+
G_END_DECLS
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index de3ac3e4f9..4fe1a6e02f 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -209,3 +209,9 @@ garrow_extract_regex_options_new_raw(
const arrow::compute::ExtractRegexOptions *arrow_options);
arrow::compute::ExtractRegexOptions *
garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options);
+
+GArrowExtractRegexSpanOptions *
+garrow_extract_regex_span_options_new_raw(
+ const arrow::compute::ExtractRegexSpanOptions *arrow_options);
+arrow::compute::ExtractRegexSpanOptions *
+garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions
*options);
diff --git a/c_glib/test/test-extract-regex-span-options.rb
b/c_glib/test/test-extract-regex-span-options.rb
new file mode 100644
index 0000000000..651c5d962c
--- /dev/null
+++ b/c_glib/test/test-extract-regex-span-options.rb
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestExtractRegexSpanOptions < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ @options = Arrow::ExtractRegexSpanOptions.new
+ end
+
+ def test_pattern_property
+ assert_equal("", @options.pattern)
+ @options.pattern = "(?P<year>\\d{4})-(?P<month>\\d{2})"
+ assert_equal("(?P<year>\\d{4})-(?P<month>\\d{2})", @options.pattern)
+ end
+
+ def test_extract_regex_span_function
+ args = [
+ Arrow::ArrayDatum.new(build_string_array(["2023-01-15", "2024-12-31"])),
+ ]
+ @options.pattern = "(?P<year>\\d{4})-(?P<month>\\d{2})-(?P<day>\\d{2})"
+ extract_regex_span_function = Arrow::Function.find("extract_regex_span")
+ result = extract_regex_span_function.execute(args, @options).value
+ fields = [
+ Arrow::Field.new("year",
Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
+ Arrow::Field.new("month",
Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
+ Arrow::Field.new("day",
Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)),
+ ]
+ assert_equal(build_struct_array(fields, [
+ {
+ "year" => [0, 4],
+ "month" => [5, 2],
+ "day" => [8, 2],
+ },
+ {
+ "year" => [0, 4],
+ "month" => [5, 2],
+ "day" => [8, 2],
+ },
+ ]),
+ result)
+ end
+end