This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 52084a9 ARROW-9294: [GLib] Add GArrowFunction and related objects
52084a9 is described below
commit 52084a9e24c0acab91910a082dec7d64e2ee3a18
Author: Sutou Kouhei <[email protected]>
AuthorDate: Fri Jul 3 09:18:09 2020 +0900
ARROW-9294: [GLib] Add GArrowFunction and related objects
Closes #7615 from kou/glib-function
Authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
c_glib/arrow-glib/basic-array.cpp | 2 +-
c_glib/arrow-glib/compute.cpp | 475 +++++++++++++++++++++++++++++++++++--
c_glib/arrow-glib/compute.h | 53 ++++-
c_glib/arrow-glib/compute.hpp | 20 ++
c_glib/arrow-glib/datum.cpp | 85 +++++--
c_glib/arrow-glib/datum.hpp | 12 +-
c_glib/arrow-glib/file.cpp | 4 +-
c_glib/arrow-glib/input-stream.cpp | 2 +-
c_glib/arrow-glib/reader.cpp | 2 +-
c_glib/test/helper/buildable.rb | 13 +
c_glib/test/test-cast.rb | 58 ++++-
c_glib/test/test-function.rb | 64 +++++
12 files changed, 728 insertions(+), 62 deletions(-)
diff --git a/c_glib/arrow-glib/basic-array.cpp
b/c_glib/arrow-glib/basic-array.cpp
index 4fa86d2..a22db92 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -329,7 +329,7 @@ garrow_array_class_init(GArrowArrayClass *klass)
spec = g_param_spec_pointer("array",
"Array",
"The raw std::shared<arrow::Array> *",
- static_cast<GParamFlags>(G_PARAM_READWRITE |
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class, PROP_ARRAY, spec);
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 581a4c6..9069465 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -27,6 +27,7 @@
#include <arrow-glib/compute.hpp>
#include <arrow-glib/chunked-array.hpp>
#include <arrow-glib/data-type.hpp>
+#include <arrow-glib/datum.hpp>
#include <arrow-glib/enums.h>
#include <arrow-glib/error.hpp>
#include <arrow-glib/record-batch.hpp>
@@ -84,27 +85,274 @@ G_BEGIN_DECLS
* @title: Computation on array
* @include: arrow-glib/arrow-glib.h
*
- * #GArrowCastOptions is a class to customize garrow_array_cast().
+ * #GArrowExecuteContext is a class to customize how to execute a
+ * function.
*
- * #GArrowCountOptions is a class to customize garrow_array_count().
+ * #GArrowFunctionOptions is an interface for function options. All
+ * function options such as #GArrowCastOptions must implement this
+ * interface.
+ *
+ * #GArrowFunction is a class to process data.
+ *
+ * #GArrowCastOptions is a class to customize the `cast` function and
+ * garrow_array_cast().
+ *
+ * #GArrowCountOptions is a class to customize the `count` function and
+ * garrow_array_count().
+ *
+ * #GArrowFilterOptions is a class to customize the `filter` function and
+ * garrow_array_filter() family.
+ *
+ * #GArrowTakeOptions is a class to customize the `take` function and
+ * garrow_array_take() family.
+ *
+ * #GArrowCompareOptions is a class to customize the `equal` function
+ * family and garrow_int8_array_compare() family.
*
* There are many functions to compute data on an array.
*/
+typedef struct GArrowExecuteContextPrivate_ {
+ arrow::compute::ExecContext context;
+} GArrowExecuteContextPrivate;
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowExecuteContext,
+ garrow_execute_context,
+ G_TYPE_OBJECT)
+
+#define GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object) \
+ static_cast<GArrowExecuteContextPrivate *>( \
+ garrow_execute_context_get_instance_private( \
+ GARROW_EXECUTE_CONTEXT(object)))
+
+static void
+garrow_execute_context_finalize(GObject *object)
+{
+ auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object);
+ priv->context.~ExecContext();
+ G_OBJECT_CLASS(garrow_execute_context_parent_class)->finalize(object);
+}
+
+static void
+garrow_execute_context_init(GArrowExecuteContext *object)
+{
+ auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(object);
+ new(&priv->context) arrow::compute::ExecContext(arrow::default_memory_pool(),
+ nullptr);
+}
+
+static void
+garrow_execute_context_class_init(GArrowExecuteContextClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = garrow_execute_context_finalize;
+}
+
+/**
+ * garrow_execute_context_new:
+ *
+ * Returns: A newly created #GArrowExecuteContext.
+ *
+ * Since: 1.0.0
+ */
+GArrowExecuteContext *
+garrow_execute_context_new(void)
+{
+ auto execute_context = g_object_new(GARROW_TYPE_EXECUTE_CONTEXT, NULL);
+ return GARROW_EXECUTE_CONTEXT(execute_context);
+}
+
+
+G_DEFINE_INTERFACE(GArrowFunctionOptions,
+ garrow_function_options,
+ G_TYPE_INVALID)
+
+static void
+garrow_function_options_default_init(GArrowFunctionOptionsInterface *iface)
+{
+}
+
+
+typedef struct GArrowFunctionPrivate_ {
+ std::shared_ptr<arrow::compute::Function> function;
+} GArrowFunctionPrivate;
+
+enum {
+ PROP_FUNCTION = 1,
+};
+
+G_DEFINE_TYPE_WITH_PRIVATE(GArrowFunction,
+ garrow_function,
+ G_TYPE_OBJECT)
+
+#define GARROW_FUNCTION_GET_PRIVATE(object) \
+ static_cast<GArrowFunctionPrivate *>( \
+ garrow_function_get_instance_private( \
+ GARROW_FUNCTION(object)))
+
+static void
+garrow_function_finalize(GObject *object)
+{
+ auto priv = GARROW_FUNCTION_GET_PRIVATE(object);
+ priv->function.~shared_ptr();
+ G_OBJECT_CLASS(garrow_function_parent_class)->finalize(object);
+}
+
+static void
+garrow_function_set_property(GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ auto priv = GARROW_FUNCTION_GET_PRIVATE(object);
+
+ switch (prop_id) {
+ case PROP_FUNCTION:
+ priv->function =
+ *static_cast<std::shared_ptr<arrow::compute::Function> *>(
+ g_value_get_pointer(value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
+ break;
+ }
+}
+
+static void
+garrow_function_init(GArrowFunction *object)
+{
+ auto priv = GARROW_FUNCTION_GET_PRIVATE(object);
+ new(&priv->function) std::shared_ptr<arrow::compute::Function>;
+}
+
+static void
+garrow_function_class_init(GArrowFunctionClass *klass)
+{
+ auto gobject_class = G_OBJECT_CLASS(klass);
+
+ gobject_class->finalize = garrow_function_finalize;
+ gobject_class->set_property = garrow_function_set_property;
+
+ GParamSpec *spec;
+ spec = g_param_spec_pointer("function",
+ "Function",
+ "The raw std::shared<arrow::compute::Function>
*",
+ static_cast<GParamFlags>(G_PARAM_WRITABLE |
+
G_PARAM_CONSTRUCT_ONLY));
+ g_object_class_install_property(gobject_class, PROP_FUNCTION, spec);
+}
+
+/**
+ * garrow_function_find:
+ * @name: A function name to be found.
+ *
+ * Returns: (transfer full):
+ * The found #GArrowFunction or %NULL on not found.
+ *
+ * Since: 1.0.0
+ */
+GArrowFunction *
+garrow_function_find(const gchar *name)
+{
+ auto arrow_function_registry = arrow::compute::GetFunctionRegistry();
+ auto arrow_function_result = arrow_function_registry->GetFunction(name);
+ if (!arrow_function_result.ok()) {
+ return NULL;
+ }
+ auto arrow_function = *arrow_function_result;
+ return garrow_function_new_raw(&arrow_function);
+}
+
+/**
+ * garrow_function_execute:
+ * @function: A #GArrowFunction.
+ * @args: (element-type GArrowDatum): A list of #GArrowDatum.
+ * @options: (nullable): Options for the execution as an object that
+ * implements #GArrowFunctionOptions.
+ * @context: (nullable): A #GArrowExecuteContext for the execution.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ * A return value of the execution as #GArrowData on success, %NULL on error.
+ *
+ * Since: 1.0.0
+ */
+GArrowDatum *
+garrow_function_execute(GArrowFunction *function,
+ GList *args,
+ GArrowFunctionOptions *options,
+ GArrowExecuteContext *context,
+ GError **error)
+{
+ auto arrow_function = garrow_function_get_raw(function);
+ std::vector<arrow::Datum> arrow_args;
+ for (GList *node = args; node; node = node->next) {
+ GArrowDatum *datum = GARROW_DATUM(node->data);
+ arrow_args.push_back(garrow_datum_get_raw(datum));
+ }
+ const arrow::compute::FunctionOptions *arrow_options;
+ if (options) {
+ arrow_options = garrow_function_options_get_raw(options);
+ } else {
+ arrow_options = arrow_function->default_options();
+ }
+ arrow::Result<arrow::Datum> arrow_result_result;
+ if (context) {
+ auto arrow_context = garrow_execute_context_get_raw(context);
+ arrow_result_result = arrow_function->Execute(arrow_args,
+ arrow_options,
+ arrow_context);
+ } else {
+ arrow::compute::ExecContext arrow_context;
+ arrow_result_result = arrow_function->Execute(arrow_args,
+ arrow_options,
+ &arrow_context);
+ }
+ if (garrow::check(error, arrow_result_result, "[function][execute]")) {
+ auto arrow_result = *arrow_result_result;
+ return garrow_datum_new_raw(&arrow_result);
+ } else {
+ return NULL;
+ }
+}
+
+
typedef struct GArrowCastOptionsPrivate_ {
+ GArrowDataType *to_data_type;
arrow::compute::CastOptions options;
} GArrowCastOptionsPrivate;
enum {
- PROP_ALLOW_INT_OVERFLOW = 1,
+ PROP_TO_DATA_TYPE = 1,
+ PROP_ALLOW_INT_OVERFLOW,
PROP_ALLOW_TIME_TRUNCATE,
+ PROP_ALLOW_TIME_OVERFLOW,
+ PROP_ALLOW_DECIMAL_TRUNCATE,
PROP_ALLOW_FLOAT_TRUNCATE,
PROP_ALLOW_INVALID_UTF8,
};
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions,
- garrow_cast_options,
- G_TYPE_OBJECT)
+static arrow::compute::FunctionOptions *
+garrow_cast_options_get_raw_function_options(GArrowFunctionOptions *options)
+{
+ return garrow_cast_options_get_raw(GARROW_CAST_OPTIONS(options));
+}
+
+static void
+garrow_cast_options_function_options_interface_init(
+ GArrowFunctionOptionsInterface *iface)
+{
+ iface->get_raw = garrow_cast_options_get_raw_function_options;
+}
+
+G_DEFINE_TYPE_WITH_CODE(GArrowCastOptions,
+ garrow_cast_options,
+ G_TYPE_OBJECT,
+ G_ADD_PRIVATE(GArrowCastOptions)
+ G_IMPLEMENT_INTERFACE(
+ GARROW_TYPE_FUNCTION_OPTIONS,
+ garrow_cast_options_function_options_interface_init))
#define GARROW_CAST_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowCastOptionsPrivate *>( \
@@ -112,6 +360,19 @@ G_DEFINE_TYPE_WITH_PRIVATE(GArrowCastOptions,
GARROW_CAST_OPTIONS(object)))
static void
+garrow_cast_options_dispose(GObject *object)
+{
+ auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object);
+
+ if (priv->to_data_type) {
+ g_object_unref(priv->to_data_type);
+ priv->to_data_type = NULL;
+ }
+
+ G_OBJECT_CLASS(garrow_cast_options_parent_class)->dispose(object);
+}
+
+static void
garrow_cast_options_finalize(GObject *object)
{
auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object);
@@ -128,12 +389,33 @@ garrow_cast_options_set_property(GObject *object,
auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
+ case PROP_TO_DATA_TYPE:
+ {
+ auto to_data_type = g_value_dup_object(value);
+ if (priv->to_data_type) {
+ g_object_unref(priv->to_data_type);
+ }
+ if (to_data_type) {
+ priv->to_data_type = GARROW_DATA_TYPE(to_data_type);
+ priv->options.to_type = garrow_data_type_get_raw(priv->to_data_type);
+ } else {
+ priv->to_data_type = NULL;
+ priv->options.to_type = nullptr;
+ }
+ break;
+ }
case PROP_ALLOW_INT_OVERFLOW:
priv->options.allow_int_overflow = g_value_get_boolean(value);
break;
case PROP_ALLOW_TIME_TRUNCATE:
priv->options.allow_time_truncate = g_value_get_boolean(value);
break;
+ case PROP_ALLOW_TIME_OVERFLOW:
+ priv->options.allow_time_overflow = g_value_get_boolean(value);
+ break;
+ case PROP_ALLOW_DECIMAL_TRUNCATE:
+ priv->options.allow_decimal_truncate = g_value_get_boolean(value);
+ break;
case PROP_ALLOW_FLOAT_TRUNCATE:
priv->options.allow_float_truncate = g_value_get_boolean(value);
break;
@@ -155,12 +437,21 @@ garrow_cast_options_get_property(GObject *object,
auto priv = GARROW_CAST_OPTIONS_GET_PRIVATE(object);
switch (prop_id) {
+ case PROP_TO_DATA_TYPE:
+ g_value_set_object(value, priv->to_data_type);
+ break;
case PROP_ALLOW_INT_OVERFLOW:
g_value_set_boolean(value, priv->options.allow_int_overflow);
break;
case PROP_ALLOW_TIME_TRUNCATE:
g_value_set_boolean(value, priv->options.allow_time_truncate);
break;
+ case PROP_ALLOW_TIME_OVERFLOW:
+ g_value_set_boolean(value, priv->options.allow_time_overflow);
+ break;
+ case PROP_ALLOW_DECIMAL_TRUNCATE:
+ g_value_set_boolean(value, priv->options.allow_decimal_truncate);
+ break;
case PROP_ALLOW_FLOAT_TRUNCATE:
g_value_set_boolean(value, priv->options.allow_float_truncate);
break;
@@ -185,11 +476,27 @@ garrow_cast_options_class_init(GArrowCastOptionsClass
*klass)
{
auto gobject_class = G_OBJECT_CLASS(klass);
+ gobject_class->dispose = garrow_cast_options_dispose;
gobject_class->finalize = garrow_cast_options_finalize;
gobject_class->set_property = garrow_cast_options_set_property;
gobject_class->get_property = garrow_cast_options_get_property;
GParamSpec *spec;
+
+ /**
+ * GArrowCastOptions:to-data-type:
+ *
+ * The GArrowDataType being casted to.
+ *
+ * Since: 1.0.0
+ */
+ spec = g_param_spec_object("to-data-type",
+ "To data type",
+ "The GArrowDataType being casted to",
+ GARROW_TYPE_DATA_TYPE,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_TO_DATA_TYPE, spec);
+
/**
* GArrowCastOptions:allow-int-overflow:
*
@@ -219,6 +526,34 @@ garrow_cast_options_class_init(GArrowCastOptionsClass
*klass)
g_object_class_install_property(gobject_class, PROP_ALLOW_TIME_TRUNCATE,
spec);
/**
+ * GArrowCastOptions:allow-time-overflow:
+ *
+ * Whether time overflow is allowed or not.
+ *
+ * Since: 1.0.0
+ */
+ spec = g_param_spec_boolean("allow-time-overflow",
+ "Allow time overflow",
+ "Whether time overflow is allowed or not",
+ FALSE,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_ALLOW_TIME_OVERFLOW,
spec);
+
+ /**
+ * GArrowCastOptions:allow-decimal-truncate:
+ *
+ * Whether truncating decimal value is allowed or not.
+ *
+ * Since: 1.0.0
+ */
+ spec = g_param_spec_boolean("allow-decimal-truncate",
+ "Allow decimal truncate",
+ "Whether truncating decimal value is allowed or
not",
+ FALSE,
+ static_cast<GParamFlags>(G_PARAM_READWRITE));
+ g_object_class_install_property(gobject_class, PROP_ALLOW_DECIMAL_TRUNCATE,
spec);
+
+ /**
* GArrowCastOptions:allow-float-truncate:
*
* Whether truncating float value is allowed or not.
@@ -270,9 +605,26 @@ enum {
PROP_MODE = 1,
};
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCountOptions,
- garrow_count_options,
- G_TYPE_OBJECT)
+static arrow::compute::FunctionOptions *
+garrow_count_options_get_raw_function_options(GArrowFunctionOptions *options)
+{
+ return garrow_count_options_get_raw(GARROW_COUNT_OPTIONS(options));
+}
+
+static void
+garrow_count_options_function_options_interface_init(
+ GArrowFunctionOptionsInterface *iface)
+{
+ iface->get_raw = garrow_count_options_get_raw_function_options;
+}
+
+G_DEFINE_TYPE_WITH_CODE(GArrowCountOptions,
+ garrow_count_options,
+ G_TYPE_OBJECT,
+ G_ADD_PRIVATE(GArrowCountOptions)
+ G_IMPLEMENT_INTERFACE(
+ GARROW_TYPE_FUNCTION_OPTIONS,
+
garrow_count_options_function_options_interface_init))
#define GARROW_COUNT_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowCountOptionsPrivate *>( \
@@ -380,9 +732,26 @@ enum {
PROP_NULL_SELECTION_BEHAVIOR = 1,
};
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowFilterOptions,
- garrow_filter_options,
- G_TYPE_OBJECT)
+static arrow::compute::FunctionOptions *
+garrow_filter_options_get_raw_function_options(GArrowFunctionOptions *options)
+{
+ return garrow_filter_options_get_raw(GARROW_FILTER_OPTIONS(options));
+}
+
+static void
+garrow_filter_options_function_options_interface_init(
+ GArrowFunctionOptionsInterface *iface)
+{
+ iface->get_raw = garrow_filter_options_get_raw_function_options;
+}
+
+G_DEFINE_TYPE_WITH_CODE(GArrowFilterOptions,
+ garrow_filter_options,
+ G_TYPE_OBJECT,
+ G_ADD_PRIVATE(GArrowFilterOptions)
+ G_IMPLEMENT_INTERFACE(
+ GARROW_TYPE_FUNCTION_OPTIONS,
+
garrow_filter_options_function_options_interface_init))
#define GARROW_FILTER_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowFilterOptionsPrivate *>( \
@@ -489,9 +858,26 @@ typedef struct GArrowTakeOptionsPrivate_ {
arrow::compute::TakeOptions options;
} GArrowTakeOptionsPrivate;
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowTakeOptions,
- garrow_take_options,
- G_TYPE_OBJECT)
+static arrow::compute::FunctionOptions *
+garrow_take_options_get_raw_function_options(GArrowFunctionOptions *options)
+{
+ return garrow_take_options_get_raw(GARROW_TAKE_OPTIONS(options));
+}
+
+static void
+garrow_take_options_function_options_interface_init(
+ GArrowFunctionOptionsInterface *iface)
+{
+ iface->get_raw = garrow_take_options_get_raw_function_options;
+}
+
+G_DEFINE_TYPE_WITH_CODE(GArrowTakeOptions,
+ garrow_take_options,
+ G_TYPE_OBJECT,
+ G_ADD_PRIVATE(GArrowTakeOptions)
+ G_IMPLEMENT_INTERFACE(
+ GARROW_TYPE_FUNCTION_OPTIONS,
+ garrow_take_options_function_options_interface_init))
#define GARROW_TAKE_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowTakeOptionsPrivate *>( \
@@ -544,9 +930,26 @@ enum {
PROP_OPERATOR = 1,
};
-G_DEFINE_TYPE_WITH_PRIVATE(GArrowCompareOptions,
- garrow_compare_options,
- G_TYPE_OBJECT)
+static arrow::compute::FunctionOptions *
+garrow_compare_options_get_raw_function_options(GArrowFunctionOptions *options)
+{
+ return garrow_compare_options_get_raw(GARROW_COMPARE_OPTIONS(options));
+}
+
+static void
+garrow_compare_options_function_options_interface_init(
+ GArrowFunctionOptionsInterface *iface)
+{
+ iface->get_raw = garrow_compare_options_get_raw_function_options;
+}
+
+G_DEFINE_TYPE_WITH_CODE(GArrowCompareOptions,
+ garrow_compare_options,
+ G_TYPE_OBJECT,
+ G_ADD_PRIVATE(GArrowCompareOptions)
+ G_IMPLEMENT_INTERFACE(
+ GARROW_TYPE_FUNCTION_OPTIONS,
+
garrow_compare_options_function_options_interface_init))
#define GARROW_COMPARE_OPTIONS_GET_PRIVATE(object) \
static_cast<GArrowCompareOptionsPrivate *>( \
@@ -2024,13 +2427,49 @@ garrow_record_batch_filter(GArrowRecordBatch
*record_batch,
G_END_DECLS
+arrow::compute::ExecContext *
+garrow_execute_context_get_raw(GArrowExecuteContext *context)
+{
+ auto priv = GARROW_EXECUTE_CONTEXT_GET_PRIVATE(context);
+ return &priv->context;
+}
+
+arrow::compute::FunctionOptions *
+garrow_function_options_get_raw(GArrowFunctionOptions *options)
+{
+ auto iface = GARROW_FUNCTION_OPTIONS_GET_IFACE(options);
+ return iface->get_raw(options);
+}
+
+GArrowFunction *
+garrow_function_new_raw(std::shared_ptr<arrow::compute::Function>
*arrow_function)
+{
+ return GARROW_FUNCTION(g_object_new(GARROW_TYPE_FUNCTION,
+ "function", arrow_function,
+ NULL));
+}
+
+std::shared_ptr<arrow::compute::Function>
+garrow_function_get_raw(GArrowFunction *function)
+{
+ auto priv = GARROW_FUNCTION_GET_PRIVATE(function);
+ return priv->function;
+}
+
GArrowCastOptions *
garrow_cast_options_new_raw(arrow::compute::CastOptions *arrow_cast_options)
{
+ GArrowDataType *to_data_type = NULL;
+ if (arrow_cast_options->to_type) {
+ to_data_type = garrow_data_type_new_raw(&(arrow_cast_options->to_type));
+ }
auto cast_options =
g_object_new(GARROW_TYPE_CAST_OPTIONS,
+ "to-data-type", to_data_type,
"allow-int-overflow", arrow_cast_options->allow_int_overflow,
"allow-time-truncate",
arrow_cast_options->allow_time_truncate,
+ "allow-time-overflow",
arrow_cast_options->allow_time_overflow,
+ "allow-decimal-truncate",
arrow_cast_options->allow_decimal_truncate,
"allow-float-truncate",
arrow_cast_options->allow_float_truncate,
"allow-invalid-utf8", arrow_cast_options->allow_invalid_utf8,
NULL);
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index dcad463..48fdc3a 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -19,13 +19,56 @@
#pragma once
-#include <arrow-glib/array.h>
-#include <arrow-glib/chunked-array.h>
-#include <arrow-glib/record-batch.h>
-#include <arrow-glib/table.h>
+#include <arrow-glib/datum.h>
G_BEGIN_DECLS
+#define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowExecuteContext,
+ garrow_execute_context,
+ GARROW,
+ EXECUTE_CONTEXT,
+ GObject)
+struct _GArrowExecuteContextClass
+{
+ GObjectClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_1_0
+GArrowExecuteContext *garrow_execute_context_new(void);
+
+
+#define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type())
+G_DECLARE_INTERFACE(GArrowFunctionOptions,
+ garrow_function_options,
+ GARROW,
+ FUNCTION_OPTIONS,
+ GObject)
+
+
+#define GARROW_TYPE_FUNCTION (garrow_function_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowFunction,
+ garrow_function,
+ GARROW,
+ FUNCTION,
+ GObject)
+struct _GArrowFunctionClass
+{
+ GObjectClass parent_class;
+};
+
+
+GARROW_AVAILABLE_IN_1_0
+GArrowFunction *garrow_function_find(const gchar *name);
+
+GARROW_AVAILABLE_IN_1_0
+GArrowDatum *garrow_function_execute(GArrowFunction *function,
+ GList *args,
+ GArrowFunctionOptions *options,
+ GArrowExecuteContext *context,
+ GError **error);
+
+
#define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowCastOptions,
garrow_cast_options,
@@ -156,7 +199,7 @@ GArrowArray *garrow_array_cast(GArrowArray *array,
GArrowCastOptions *options,
GError **error);
GArrowArray *garrow_array_unique(GArrowArray *array,
- GError **error);
+ GError **error);
GArrowDictionaryArray *garrow_array_dictionary_encode(GArrowArray *array,
GError **error);
GARROW_AVAILABLE_IN_0_13
diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp
index fe5022e..dc32b32 100644
--- a/c_glib/arrow-glib/compute.hpp
+++ b/c_glib/arrow-glib/compute.hpp
@@ -23,6 +23,26 @@
#include <arrow-glib/compute.h>
+
+struct _GArrowFunctionOptionsInterface
+{
+ GTypeInterface parent_iface;
+
+ arrow::compute::FunctionOptions *(*get_raw)(GArrowFunctionOptions *options);
+};
+
+
+arrow::compute::ExecContext *
+garrow_execute_context_get_raw(GArrowExecuteContext *context);
+
+arrow::compute::FunctionOptions *
+garrow_function_options_get_raw(GArrowFunctionOptions *options);
+
+GArrowFunction *
+garrow_function_new_raw(std::shared_ptr<arrow::compute::Function>
*arrow_function);
+std::shared_ptr<arrow::compute::Function>
+garrow_function_get_raw(GArrowFunction *function);
+
GArrowCastOptions *garrow_cast_options_new_raw(arrow::compute::CastOptions
*arrow_cast_options);
arrow::compute::CastOptions *garrow_cast_options_get_raw(GArrowCastOptions
*cast_options);
diff --git a/c_glib/arrow-glib/datum.cpp b/c_glib/arrow-glib/datum.cpp
index c1f71ad..ceb1140 100644
--- a/c_glib/arrow-glib/datum.cpp
+++ b/c_glib/arrow-glib/datum.cpp
@@ -49,7 +49,7 @@ G_BEGIN_DECLS
*/
typedef struct GArrowDatumPrivate_ {
- std::shared_ptr<arrow::Datum> datum;
+ arrow::Datum datum;
} GArrowDatumPrivate;
enum {
@@ -68,7 +68,7 @@ garrow_datum_finalize(GObject *object)
{
auto priv = GARROW_DATUM_GET_PRIVATE(object);
- priv->datum.~shared_ptr();
+ priv->datum.~Datum();
G_OBJECT_CLASS(garrow_datum_parent_class)->finalize(object);
}
@@ -83,8 +83,7 @@ garrow_datum_set_property(GObject *object,
switch (prop_id) {
case PROP_DATUM:
- priv->datum =
- *static_cast<std::shared_ptr<arrow::Datum>
*>(g_value_get_pointer(value));
+ priv->datum = *static_cast<arrow::Datum *>(g_value_get_pointer(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
@@ -96,7 +95,7 @@ static void
garrow_datum_init(GArrowDatum *object)
{
auto priv = GARROW_DATUM_GET_PRIVATE(object);
- new(&priv->datum) std::shared_ptr<arrow::Datum>;
+ new(&priv->datum) arrow::Datum;
}
static void
@@ -110,7 +109,7 @@ garrow_datum_class_init(GArrowDatumClass *klass)
GParamSpec *spec;
spec = g_param_spec_pointer("datum",
"Datum",
- "The raw std::shared_ptr<arrow::Datum> *",
+ "The raw arrow::Datum *",
static_cast<GParamFlags>(G_PARAM_WRITABLE |
G_PARAM_CONSTRUCT_ONLY));
g_object_class_install_property(gobject_class, PROP_DATUM, spec);
@@ -128,8 +127,8 @@ garrow_datum_class_init(GArrowDatumClass *klass)
gboolean
garrow_datum_is_array(GArrowDatum *datum)
{
- const auto arrow_datum = garrow_datum_get_raw(datum);
- return arrow_datum->is_array();
+ const auto &arrow_datum = garrow_datum_get_raw(datum);
+ return arrow_datum.is_array();
}
/**
@@ -144,8 +143,8 @@ garrow_datum_is_array(GArrowDatum *datum)
gboolean
garrow_datum_is_array_like(GArrowDatum *datum)
{
- const auto arrow_datum = garrow_datum_get_raw(datum);
- return arrow_datum->is_arraylike();
+ const auto &arrow_datum = garrow_datum_get_raw(datum);
+ return arrow_datum.is_arraylike();
}
/**
@@ -161,9 +160,9 @@ garrow_datum_is_array_like(GArrowDatum *datum)
gboolean
garrow_datum_equal(GArrowDatum *datum, GArrowDatum *other_datum)
{
- const auto arrow_datum = garrow_datum_get_raw(datum);
- const auto arrow_other_datum = garrow_datum_get_raw(other_datum);
- return arrow_datum->Equals(*arrow_other_datum);
+ const auto &arrow_datum = garrow_datum_get_raw(datum);
+ const auto &arrow_other_datum = garrow_datum_get_raw(other_datum);
+ return arrow_datum.Equals(arrow_other_datum);
}
/**
@@ -179,8 +178,8 @@ garrow_datum_equal(GArrowDatum *datum, GArrowDatum
*other_datum)
gchar *
garrow_datum_to_string(GArrowDatum *datum)
{
- const auto arrow_datum = garrow_datum_get_raw(datum);
- return g_strdup(arrow_datum->ToString().c_str());
+ const auto &arrow_datum = garrow_datum_get_raw(datum);
+ return g_strdup(arrow_datum.ToString().c_str());
}
@@ -286,7 +285,7 @@ GArrowArrayDatum *
garrow_array_datum_new(GArrowArray *value)
{
auto arrow_value = garrow_array_get_raw(value);
- auto arrow_datum = std::make_shared<arrow::Datum>(arrow_value);
+ arrow::Datum arrow_datum(arrow_value);
return garrow_array_datum_new_raw(&arrow_datum, value);
}
@@ -389,7 +388,7 @@ GArrowChunkedArrayDatum *
garrow_chunked_array_datum_new(GArrowChunkedArray *value)
{
auto arrow_value = garrow_chunked_array_get_raw(value);
- auto arrow_datum = std::make_shared<arrow::Datum>(arrow_value);
+ arrow::Datum arrow_datum(arrow_value);
return garrow_chunked_array_datum_new_raw(&arrow_datum, value);
}
@@ -492,7 +491,7 @@ GArrowRecordBatchDatum *
garrow_record_batch_datum_new(GArrowRecordBatch *value)
{
auto arrow_value = garrow_record_batch_get_raw(value);
- auto arrow_datum = std::make_shared<arrow::Datum>(arrow_value);
+ arrow::Datum arrow_datum(arrow_value);
return garrow_record_batch_datum_new_raw(&arrow_datum, value);
}
@@ -595,22 +594,60 @@ GArrowTableDatum *
garrow_table_datum_new(GArrowTable *value)
{
auto arrow_value = garrow_table_get_raw(value);
- auto arrow_datum = std::make_shared<arrow::Datum>(arrow_value);
+ arrow::Datum arrow_datum(arrow_value);
return garrow_table_datum_new_raw(&arrow_datum, value);
}
G_END_DECLS
-std::shared_ptr<arrow::Datum>
+arrow::Datum
garrow_datum_get_raw(GArrowDatum *datum)
{
auto priv = GARROW_DATUM_GET_PRIVATE(datum);
return priv->datum;
}
+GArrowDatum *
+garrow_datum_new_raw(arrow::Datum *arrow_datum)
+{
+ switch (arrow_datum->kind()) {
+ case arrow::Datum::ARRAY:
+ {
+ auto arrow_array = arrow_datum->make_array();
+ auto array = garrow_array_new_raw(&arrow_array);
+ return GARROW_DATUM(garrow_array_datum_new_raw(arrow_datum, array));
+ }
+ case arrow::Datum::CHUNKED_ARRAY:
+ {
+ auto arrow_chunked_array = arrow_datum->chunked_array();
+ auto chunked_array = garrow_chunked_array_new_raw(&arrow_chunked_array);
+ auto chunked_array_datum =
+ garrow_chunked_array_datum_new_raw(arrow_datum, chunked_array);
+ return GARROW_DATUM(chunked_array_datum);
+ }
+ case arrow::Datum::RECORD_BATCH:
+ {
+ auto arrow_record_batch = arrow_datum->record_batch();
+ auto record_batch = garrow_record_batch_new_raw(&arrow_record_batch);
+ auto record_batch_datum =
+ garrow_record_batch_datum_new_raw(arrow_datum, record_batch);
+ return GARROW_DATUM(record_batch_datum);
+ }
+ case arrow::Datum::TABLE:
+ {
+ auto arrow_table = arrow_datum->table();
+ auto table = garrow_table_new_raw(&arrow_table);
+ return GARROW_DATUM(garrow_table_datum_new_raw(arrow_datum, table));
+ }
+ default:
+ // TODO
+ return NULL;
+ }
+}
+
GArrowArrayDatum *
-garrow_array_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
GArrowArray *value)
{
return GARROW_ARRAY_DATUM(g_object_new(GARROW_TYPE_ARRAY_DATUM,
@@ -620,7 +657,7 @@ garrow_array_datum_new_raw(std::shared_ptr<arrow::Datum>
*arrow_datum,
}
GArrowChunkedArrayDatum *
-garrow_chunked_array_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum,
GArrowChunkedArray *value)
{
return
GARROW_CHUNKED_ARRAY_DATUM(g_object_new(GARROW_TYPE_CHUNKED_ARRAY_DATUM,
@@ -630,7 +667,7 @@
garrow_chunked_array_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
}
GArrowRecordBatchDatum *
-garrow_record_batch_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum,
GArrowRecordBatch *value)
{
return GARROW_RECORD_BATCH_DATUM(g_object_new(GARROW_TYPE_RECORD_BATCH_DATUM,
@@ -640,7 +677,7 @@
garrow_record_batch_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
}
GArrowTableDatum *
-garrow_table_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_table_datum_new_raw(arrow::Datum *arrow_datum,
GArrowTable *value)
{
return GARROW_TABLE_DATUM(g_object_new(GARROW_TYPE_TABLE_DATUM,
diff --git a/c_glib/arrow-glib/datum.hpp b/c_glib/arrow-glib/datum.hpp
index 669de36..673501f 100644
--- a/c_glib/arrow-glib/datum.hpp
+++ b/c_glib/arrow-glib/datum.hpp
@@ -23,18 +23,20 @@
#include <arrow-glib/datum.h>
-std::shared_ptr<arrow::Datum>
+arrow::Datum
garrow_datum_get_raw(GArrowDatum *datum);
+GArrowDatum *
+garrow_datum_new_raw(arrow::Datum *arrow_datum);
GArrowArrayDatum *
-garrow_array_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_array_datum_new_raw(arrow::Datum *arrow_datum,
GArrowArray *value);
GArrowChunkedArrayDatum *
-garrow_chunked_array_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_chunked_array_datum_new_raw(arrow::Datum *arrow_datum,
GArrowChunkedArray *value);
GArrowRecordBatchDatum *
-garrow_record_batch_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_record_batch_datum_new_raw(arrow::Datum *arrow_datum,
GArrowRecordBatch *value);
GArrowTableDatum *
-garrow_table_datum_new_raw(std::shared_ptr<arrow::Datum> *arrow_datum,
+garrow_table_datum_new_raw(arrow::Datum *arrow_datum,
GArrowTable *value);
diff --git a/c_glib/arrow-glib/file.cpp b/c_glib/arrow-glib/file.cpp
index 8743132..c4ab3ea 100644
--- a/c_glib/arrow-glib/file.cpp
+++ b/c_glib/arrow-glib/file.cpp
@@ -42,7 +42,7 @@ G_DEFINE_INTERFACE(GArrowFile,
G_TYPE_OBJECT)
static void
-garrow_file_default_init (GArrowFileInterface *iface)
+garrow_file_default_init(GArrowFileInterface *iface)
{
}
@@ -55,7 +55,7 @@ garrow_file_default_init (GArrowFileInterface *iface)
*/
gboolean
garrow_file_close(GArrowFile *file,
- GError **error)
+ GError **error)
{
auto arrow_file = garrow_file_get_raw(file);
diff --git a/c_glib/arrow-glib/input-stream.cpp
b/c_glib/arrow-glib/input-stream.cpp
index 7b8419b..3751d41 100644
--- a/c_glib/arrow-glib/input-stream.cpp
+++ b/c_glib/arrow-glib/input-stream.cpp
@@ -108,7 +108,7 @@ G_DEFINE_TYPE_WITH_CODE(GArrowInputStream,
G_IMPLEMENT_INTERFACE(GARROW_TYPE_FILE,
garrow_input_stream_file_interface_init)
G_IMPLEMENT_INTERFACE(GARROW_TYPE_READABLE,
-
garrow_input_stream_readable_interface_init));
+
garrow_input_stream_readable_interface_init))
#define GARROW_INPUT_STREAM_GET_PRIVATE(obj) \
static_cast<GArrowInputStreamPrivate *>( \
diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp
index 11b82e0..43b813b 100644
--- a/c_glib/arrow-glib/reader.cpp
+++ b/c_glib/arrow-glib/reader.cpp
@@ -1683,10 +1683,10 @@ garrow_json_read_options_set_property(GObject *object,
break;
case PROP_JSON_READER_SCHEMA:
{
+ auto schema = g_value_dup_object(value);
if (priv->schema) {
g_object_unref(priv->schema);
}
- auto schema = g_value_dup_object(value);
if (schema) {
priv->schema = GARROW_SCHEMA(schema);
priv->parse_options.explicit_schema =
garrow_schema_get_raw(priv->schema);
diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb
index 0826a93..9ec8fb4 100644
--- a/c_glib/test/helper/buildable.rb
+++ b/c_glib/test/helper/buildable.rb
@@ -113,6 +113,19 @@ module Helper
build_array(Arrow::LargeStringArrayBuilder.new, values)
end
+ def build_decimal128_array(value_data_type, values)
+ values = values.collect do |value|
+ case value
+ when String
+ Arrow::Decimal128.new(value)
+ else
+ value
+ end
+ end
+ build_array(Arrow::Decimal128ArrayBuilder.new(value_data_type),
+ values)
+ end
+
def build_list_array(value_data_type, values_list, field_name: "value")
value_field = Arrow::Field.new(field_name, value_data_type)
data_type = Arrow::ListDataType.new(value_field)
diff --git a/c_glib/test/test-cast.rb b/c_glib/test/test-cast.rb
index 82a6a9a..528a0e8 100644
--- a/c_glib/test/test-cast.rb
+++ b/c_glib/test/test-cast.rb
@@ -43,10 +43,11 @@ class TestCast < Test::Unit::TestCase
sub_test_case("allow-time-truncate") do
def test_default
- after_epoch = 1504953190854 # 2017-09-09T10:33:10.854Z
- second_timestamp = Arrow::TimestampDataType.new(:second)
+ after_epoch_in_milli = 1504953190854 # 2017-09-09T10:33:10.854Z
+ second_timestamp_data_type = Arrow::TimestampDataType.new(:second)
+ milli_array = build_timestamp_array(:milli, [after_epoch_in_milli])
assert_raise(Arrow::Error::Invalid) do
- build_timestamp_array(:milli, [after_epoch]).cast(second_timestamp)
+ milli_array.cast(second_timestamp_data_type)
end
end
@@ -57,9 +58,56 @@ class TestCast < Test::Unit::TestCase
second_array = build_timestamp_array(:second,
[after_epoch_in_milli / 1000])
milli_array = build_timestamp_array(:milli, [after_epoch_in_milli])
- second_timestamp = Arrow::TimestampDataType.new(:second)
+ second_timestamp_data_type = Arrow::TimestampDataType.new(:second)
assert_equal(second_array,
- milli_array.cast(second_timestamp, options))
+ milli_array.cast(second_timestamp_data_type, options))
+ end
+ end
+
+ sub_test_case("allow-time-overflow") do
+ def test_default
+ after_epoch_in_second = 95617584000 # 5000-01-01T00:00:00Z
+ nano_timestamp_data_type = Arrow::TimestampDataType.new(:nano)
+ second_array = build_timestamp_array(:second, [after_epoch_in_second])
+ assert_raise(Arrow::Error::Invalid) do
+ second_array.cast(nano_timestamp_data_type)
+ end
+ end
+
+ def test_true
+ options = Arrow::CastOptions.new
+ options.allow_time_overflow = true
+ after_epoch_in_second = 95617584000 # 5000-01-01T00:00:00Z
+ second_array = build_timestamp_array(:second,
+ [after_epoch_in_second])
+ after_epoch_in_nano_overflowed =
+ (after_epoch_in_second * 1000 * 1000 * 1000) % (2 ** 64)
+ nano_array = build_timestamp_array(:nano,
+ [after_epoch_in_nano_overflowed])
+ nano_timestamp_data_type = Arrow::TimestampDataType.new(:nano)
+ assert_equal(nano_array,
+ second_array.cast(nano_timestamp_data_type, options))
+ end
+ end
+
+ sub_test_case("allow-decimal-truncate") do
+ def test_default
+ decimal128_data_type = Arrow::Decimal128DataType.new(8, 2)
+ decimal128_array = build_decimal128_array(decimal128_data_type,
+ ["23423445"])
+ assert_raise(Arrow::Error::Invalid) do
+ decimal128_array.cast(Arrow::Int64DataType.new)
+ end
+ end
+
+ def test_true
+ options = Arrow::CastOptions.new
+ options.allow_decimal_truncate = true
+ decimal128_data_type = Arrow::Decimal128DataType.new(8, 2)
+ decimal128_array = build_decimal128_array(decimal128_data_type,
+ ["23423445"])
+ assert_equal(build_int64_array([234234]),
+ decimal128_array.cast(Arrow::Int64DataType.new, options))
end
end
diff --git a/c_glib/test/test-function.rb b/c_glib/test/test-function.rb
new file mode 100644
index 0000000..8530ea5
--- /dev/null
+++ b/c_glib/test/test-function.rb
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestFunction < Test::Unit::TestCase
+ include Helper::Buildable
+
+ sub_test_case("#execute") do
+ def test_array
+ or_function = Arrow::Function.find("or")
+ args = [
+ Arrow::ArrayDatum.new(build_boolean_array([true, false, false])),
+ Arrow::ArrayDatum.new(build_boolean_array([true, false, true])),
+ ]
+ assert_equal(build_boolean_array([true, false, true]),
+ or_function.execute(args).value)
+ end
+
+ def test_chunked_array
+ or_function = Arrow::Function.find("or")
+ chunked_arrays = [
+ Arrow::ChunkedArray.new([
+ build_boolean_array([true]),
+ build_boolean_array([false, false]),
+ ]),
+ Arrow::ChunkedArray.new([
+ build_boolean_array([true, false]),
+ build_boolean_array([true]),
+ ]),
+ ]
+ args = chunked_arrays.collect do |chunked_array|
+ Arrow::ChunkedArrayDatum.new(chunked_array)
+ end
+ expected_array = build_boolean_array([true, false, true])
+ expected = Arrow::ChunkedArray.new([expected_array])
+ assert_equal(expected,
+ or_function.execute(args).value)
+ end
+
+ def test_options
+ cast_function = Arrow::Function.find("cast")
+ args = [
+ Arrow::ArrayDatum.new(build_string_array(["1", "2", "-3"])),
+ ]
+ options = Arrow::CastOptions.new
+ options.to_data_type = Arrow::Int8DataType.new
+ assert_equal(build_int8_array([1, 2, -3]),
+ cast_function.execute(args, options).value)
+ end
+ end
+end