Repository: arrow Updated Branches: refs/heads/master fee447510 -> 62a17b7e8
ARROW-1044: [GLib] Support Feather Author: Kouhei Sutou <k...@clear-code.com> Closes #699 from kou/glib-support-feather and squashes the following commits: 75ce6119 [Kouhei Sutou] [GLib] Support Feather Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/62a17b7e Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/62a17b7e Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/62a17b7e Branch: refs/heads/master Commit: 62a17b7e823162f7a07ddb9747e4ed0df3e1c9ca Parents: fee4475 Author: Kouhei Sutou <k...@clear-code.com> Authored: Wed May 17 15:13:12 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Wed May 17 15:13:12 2017 -0400 ---------------------------------------------------------------------- c_glib/arrow-glib/reader.cpp | 308 ++++++++++++++++++++++++++- c_glib/arrow-glib/reader.h | 72 +++++++ c_glib/arrow-glib/reader.hpp | 4 + c_glib/arrow-glib/writer.cpp | 216 ++++++++++++++++++- c_glib/arrow-glib/writer.h | 60 ++++++ c_glib/arrow-glib/writer.hpp | 4 + c_glib/test/test-feather-file-reader.rb | 155 ++++++++++++++ c_glib/test/test-feather-file-writer.rb | 68 ++++++ 8 files changed, 883 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/reader.cpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/reader.cpp b/c_glib/arrow-glib/reader.cpp index c092324..3ff6ba1 100644 --- a/c_glib/arrow-glib/reader.cpp +++ b/c_glib/arrow-glib/reader.cpp @@ -21,8 +21,7 @@ # include <config.h> #endif -#include <arrow/ipc/api.h> - +#include <arrow-glib/column.hpp> #include <arrow-glib/error.hpp> #include <arrow-glib/record-batch.hpp> #include <arrow-glib/schema.hpp> @@ -48,6 +47,9 @@ G_BEGIN_DECLS * * #GArrowRecordBatchFileReader is a class for reading record * batches in file format from input. + * + * #GArrowFeatherFileReader is a class for reading columns in Feather + * file format from input. */ typedef struct GArrowRecordBatchReaderPrivate_ { @@ -419,6 +421,288 @@ garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *re } } + +typedef struct GArrowFeatherFileReaderPrivate_ { + arrow::ipc::feather::TableReader *feather_table_reader; +} GArrowFeatherFileReaderPrivate; + +enum { + PROP_0__, + PROP_FEATHER_TABLE_READER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherFileReader, + garrow_feather_file_reader, + G_TYPE_OBJECT); + +#define GARROW_FEATHER_FILE_READER_GET_PRIVATE(obj) \ + (G_TYPE_INSTANCE_GET_PRIVATE((obj), \ + GARROW_TYPE_FEATHER_FILE_READER, \ + GArrowFeatherFileReaderPrivate)) + +static void +garrow_feather_file_reader_finalize(GObject *object) +{ + GArrowFeatherFileReaderPrivate *priv; + + priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(object); + + delete priv->feather_table_reader; + + G_OBJECT_CLASS(garrow_feather_file_reader_parent_class)->finalize(object); +} + +static void +garrow_feather_file_reader_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + GArrowFeatherFileReaderPrivate *priv; + + priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FEATHER_TABLE_READER: + priv->feather_table_reader = + static_cast<arrow::ipc::feather::TableReader *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_reader_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_reader_init(GArrowFeatherFileReader *object) +{ +} + +static void +garrow_feather_file_reader_class_init(GArrowFeatherFileReaderClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_feather_file_reader_finalize; + gobject_class->set_property = garrow_feather_file_reader_set_property; + gobject_class->get_property = garrow_feather_file_reader_get_property; + + spec = g_param_spec_pointer("feather-table-reader", + "arrow::ipc::feather::TableReader", + "The raw std::shared<arrow::ipc::feather::TableReader> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FEATHER_TABLE_READER, spec); +} + + +/** + * garrow_feather_file_reader_new: + * @file: The file to be read. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowFeatherFileReader + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowFeatherFileReader * +garrow_feather_file_reader_new(GArrowSeekableInputStream *file, + GError **error) +{ + auto arrow_random_access_file = garrow_seekable_input_stream_get_raw(file); + std::unique_ptr<arrow::ipc::feather::TableReader> arrow_reader; + auto status = + arrow::ipc::feather::TableReader::Open(arrow_random_access_file, + &arrow_reader); + if (garrow_error_check(error, status, "[feather-file-reader][new]")) { + return garrow_feather_file_reader_new_raw(arrow_reader.release()); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_get_description: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: (nullable): The description of the file if it exists, + * %NULL otherwise. You can confirm whether description exists or not by + * garrow_feather_file_reader_has_description(). + * It should be freed with g_free() when no longer needed. + * + * Since: 0.4.0 + */ +gchar * +garrow_feather_file_reader_get_description(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + if (arrow_reader->HasDescription()) { + auto description = arrow_reader->GetDescription(); + return g_strndup(description.data(), + description.size()); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_has_description: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: Whether the file has description or not. + * + * Since: 0.4.0 + */ +gboolean +garrow_feather_file_reader_has_description(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + return arrow_reader->HasDescription(); +} + +/** + * garrow_feather_file_reader_get_version: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: The format version of the file. + * + * Since: 0.4.0 + */ +gint +garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + return arrow_reader->version(); +} + +/** + * garrow_feather_file_reader_get_n_rows: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: The number of rows in the file. + * + * Since: 0.4.0 + */ +gint64 +garrow_feather_file_reader_get_n_rows(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + return arrow_reader->num_rows(); +} + +/** + * garrow_feather_file_reader_get_n_columns: + * @reader: A #GArrowFeatherFileReader. + * + * Returns: The number of columns in the file. + * + * Since: 0.4.0 + */ +gint64 +garrow_feather_file_reader_get_n_columns(GArrowFeatherFileReader *reader) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + return arrow_reader->num_columns(); +} + +/** + * garrow_feather_file_reader_get_column_name: + * @reader: A #GArrowFeatherFileReader. + * @i: The index of the target column. + * + * Returns: The i-th column name in the file. + * It should be freed with g_free() when no longer needed. + * + * Since: 0.4.0 + */ +gchar * +garrow_feather_file_reader_get_column_name(GArrowFeatherFileReader *reader, + gint i) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + auto column_name = arrow_reader->GetColumnName(i); + return g_strndup(column_name.data(), + column_name.size()); +} + +/** + * garrow_feather_file_reader_get_column: + * @reader: A #GArrowFeatherFileReader. + * @i: The index of the target column. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable) (transfer full): + * The i-th column in the file or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowColumn * +garrow_feather_file_reader_get_column(GArrowFeatherFileReader *reader, + gint i, + GError **error) +{ + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + std::shared_ptr<arrow::Column> arrow_column; + auto status = arrow_reader->GetColumn(i, &arrow_column); + + if (garrow_error_check(error, status, "[feather-file-reader][get-column]")) { + return garrow_column_new_raw(&arrow_column); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_reader_get_columns: + * @reader: A #GArrowFeatherFileReader. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (element-type GArrowColumn) (transfer full): + * The columns in the file. + * + * Since: 0.4.0 + */ +GList * +garrow_feather_file_reader_get_columns(GArrowFeatherFileReader *reader, + GError **error) +{ + GList *columns = NULL; + auto arrow_reader = garrow_feather_file_reader_get_raw(reader); + auto n_columns = arrow_reader->num_columns(); + for (gint i = 0; i < n_columns; ++i) { + std::shared_ptr<arrow::Column> arrow_column; + auto status = arrow_reader->GetColumn(i, &arrow_column); + if (!garrow_error_check(error, + status, + "[feather-file-reader][get-columns]")) { + g_list_foreach(columns, (GFunc)g_object_unref, NULL); + g_list_free(columns); + return NULL; + } + columns = g_list_prepend(columns, + garrow_column_new_raw(&arrow_column)); + } + return g_list_reverse(columns); +} + + G_END_DECLS GArrowRecordBatchReader * @@ -470,3 +754,23 @@ garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader) priv = GARROW_RECORD_BATCH_FILE_READER_GET_PRIVATE(reader); return priv->record_batch_file_reader; } + +GArrowFeatherFileReader * +garrow_feather_file_reader_new_raw(arrow::ipc::feather::TableReader *arrow_reader) +{ + auto reader = + GARROW_FEATHER_FILE_READER( + g_object_new(GARROW_TYPE_FEATHER_FILE_READER, + "feather-table-reader", arrow_reader, + NULL)); + return reader; +} + +arrow::ipc::feather::TableReader * +garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader) +{ + GArrowFeatherFileReaderPrivate *priv; + + priv = GARROW_FEATHER_FILE_READER_GET_PRIVATE(reader); + return priv->feather_table_reader; +} http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/reader.h ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h index 477204b..b8bdbe2 100644 --- a/c_glib/arrow-glib/reader.h +++ b/c_glib/arrow-glib/reader.h @@ -193,4 +193,76 @@ GArrowRecordBatch *garrow_record_batch_file_reader_get_record_batch( guint i, GError **error); + +#define GARROW_TYPE_FEATHER_FILE_READER \ + (garrow_feather_file_reader_get_type()) +#define GARROW_FEATHER_FILE_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_FEATHER_FILE_READER, \ + GArrowFeatherFileReader)) +#define GARROW_FEATHER_FILE_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_FEATHER_FILE_READER, \ + GArrowFeatherFileReaderClass)) +#define GARROW_IS_FEATHER_FILE_READER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_FEATHER_FILE_READER)) +#define GARROW_IS_FEATHER_FILE_READER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_FEATHER_FILE_READER)) +#define GARROW_FEATHER_FILE_READER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_FEATHER_FILE_READER, \ + GArrowFeatherFileReaderClass)) + +typedef struct _GArrowFeatherFileReader GArrowFeatherFileReader; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowFeatherFileReaderClass GArrowFeatherFileReaderClass; +#endif + +/** + * GArrowFeatherFileReader: + * + * It wraps `arrow::ipc::feather::TableReader`. + */ +struct _GArrowFeatherFileReader +{ + /*< private >*/ + GObject parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowFeatherFileReaderClass +{ + GObjectClass parent_class; +}; +#endif + +GType garrow_feather_file_reader_get_type(void) G_GNUC_CONST; + +GArrowFeatherFileReader *garrow_feather_file_reader_new( + GArrowSeekableInputStream *file, + GError **error); + +gchar *garrow_feather_file_reader_get_description( + GArrowFeatherFileReader *reader); +gboolean garrow_feather_file_reader_has_description( + GArrowFeatherFileReader *reader); +gint garrow_feather_file_reader_get_version( + GArrowFeatherFileReader *reader); +gint64 garrow_feather_file_reader_get_n_rows( + GArrowFeatherFileReader *reader); +gint64 garrow_feather_file_reader_get_n_columns( + GArrowFeatherFileReader *reader); +gchar *garrow_feather_file_reader_get_column_name( + GArrowFeatherFileReader *reader, + gint i); +GArrowColumn *garrow_feather_file_reader_get_column( + GArrowFeatherFileReader *reader, + gint i, + GError **error); +GList *garrow_feather_file_reader_get_columns( + GArrowFeatherFileReader *reader, + GError **error); + G_END_DECLS http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/reader.hpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp index 7d0b24a..7970e73 100644 --- a/c_glib/arrow-glib/reader.hpp +++ b/c_glib/arrow-glib/reader.hpp @@ -21,6 +21,7 @@ #include <arrow/api.h> #include <arrow/ipc/api.h> +#include <arrow/ipc/feather.h> #include <arrow-glib/reader.h> @@ -31,3 +32,6 @@ GArrowRecordBatchStreamReader *garrow_record_batch_stream_reader_new_raw(std::sh GArrowRecordBatchFileReader *garrow_record_batch_file_reader_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileReader> *arrow_reader); std::shared_ptr<arrow::ipc::RecordBatchFileReader> garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader); + +GArrowFeatherFileReader *garrow_feather_file_reader_new_raw(arrow::ipc::feather::TableReader *arrow_reader); +arrow::ipc::feather::TableReader *garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader); http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/writer.cpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/writer.cpp b/c_glib/arrow-glib/writer.cpp index 20dd281..d4c36c2 100644 --- a/c_glib/arrow-glib/writer.cpp +++ b/c_glib/arrow-glib/writer.cpp @@ -21,8 +21,6 @@ # include <config.h> #endif -#include <arrow/ipc/api.h> - #include <arrow-glib/array.hpp> #include <arrow-glib/error.hpp> #include <arrow-glib/record-batch.hpp> @@ -48,6 +46,9 @@ G_BEGIN_DECLS * * #GArrowRecordBatchFileWriter is a class for writing record * batches in file format into output. + * + * #GArrowFeatherFileWriter is a class for writing arrays + * in Feather file format into output. */ typedef struct GArrowRecordBatchWriterPrivate_ { @@ -271,8 +272,199 @@ garrow_record_batch_file_writer_new(GArrowOutputStream *sink, } } + +typedef struct GArrowFeatherFileWriterPrivate_ { + arrow::ipc::feather::TableWriter *feather_table_writer; +} GArrowFeatherFileWriterPrivate; + +enum { + PROP_0_, + PROP_FEATHER_TABLE_WRITER +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFeatherFileWriter, + garrow_feather_file_writer, + G_TYPE_OBJECT); + +#define GARROW_FEATHER_FILE_WRITER_GET_PRIVATE(obj) \ + (G_TYPE_INSTANCE_GET_PRIVATE((obj), \ + GARROW_TYPE_FEATHER_FILE_WRITER, \ + GArrowFeatherFileWriterPrivate)) + +static void +garrow_feather_file_writer_finalize(GObject *object) +{ + GArrowFeatherFileWriterPrivate *priv; + + priv = GARROW_FEATHER_FILE_WRITER_GET_PRIVATE(object); + + delete priv->feather_table_writer; + + G_OBJECT_CLASS(garrow_feather_file_writer_parent_class)->finalize(object); +} + +static void +garrow_feather_file_writer_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + GArrowFeatherFileWriterPrivate *priv; + + priv = GARROW_FEATHER_FILE_WRITER_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_FEATHER_TABLE_WRITER: + priv->feather_table_writer = + static_cast<arrow::ipc::feather::TableWriter *>(g_value_get_pointer(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_writer_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + switch (prop_id) { + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_feather_file_writer_init(GArrowFeatherFileWriter *object) +{ +} + +static void +garrow_feather_file_writer_class_init(GArrowFeatherFileWriterClass *klass) +{ + GObjectClass *gobject_class; + GParamSpec *spec; + + gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->finalize = garrow_feather_file_writer_finalize; + gobject_class->set_property = garrow_feather_file_writer_set_property; + gobject_class->get_property = garrow_feather_file_writer_get_property; + + spec = g_param_spec_pointer("feather-table-writer", + "arrow::ipc::feather::TableWriter", + "The raw std::shared<arrow::ipc::feather::TableWriter> *", + static_cast<GParamFlags>(G_PARAM_WRITABLE | + G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_FEATHER_TABLE_WRITER, spec); +} + +/** + * garrow_feather_file_writer_new: + * @sink: The output of the writer. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: (nullable): A newly created #GArrowFeatherFileWriter + * or %NULL on error. + * + * Since: 0.4.0 + */ +GArrowFeatherFileWriter * +garrow_feather_file_writer_new(GArrowOutputStream *sink, + GError **error) +{ + auto arrow_sink = garrow_output_stream_get_raw(sink); + std::unique_ptr<arrow::ipc::feather::TableWriter> arrow_writer; + auto status = arrow::ipc::feather::TableWriter::Open(arrow_sink, + &arrow_writer); + if (garrow_error_check(error, status, "[feature-file-writer][new]")) { + return garrow_feather_file_writer_new_raw(arrow_writer.release()); + } else { + return NULL; + } +} + +/** + * garrow_feather_file_writer_set_description: + * @writer: A #GArrowFeatherFileWriter. + * @description: The description of the file. + * + * Since: 0.4.0 + */ +void +garrow_feather_file_writer_set_description(GArrowFeatherFileWriter *writer, + const gchar *description) +{ + auto arrow_writer = garrow_feather_file_writer_get_raw(writer); + arrow_writer->SetDescription(std::string(description)); +} + +/** + * garrow_feather_file_writer_set_n_rows: + * @writer: A #GArrowFeatherFileWriter. + * @n_rows: The number of rows in the file. + * + * Since: 0.4.0 + */ +void +garrow_feather_file_writer_set_n_rows(GArrowFeatherFileWriter *writer, + gint64 n_rows) +{ + auto arrow_writer = garrow_feather_file_writer_get_raw(writer); + arrow_writer->SetNumRows(n_rows); +} + +/** + * garrow_feather_file_writer_append: + * @writer: A #GArrowFeatherFileWriter. + * @name: The name of the array to be appended. + * @array: The array to be appended. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.4.0 + */ +gboolean +garrow_feather_file_writer_append(GArrowFeatherFileWriter *writer, + const gchar *name, + GArrowArray *array, + GError **error) +{ + auto arrow_writer = garrow_feather_file_writer_get_raw(writer); + auto arrow_array = garrow_array_get_raw(array); + + auto status = arrow_writer->Append(std::string(name), *arrow_array); + return garrow_error_check(error, + status, + "[feather-file-writer][append]"); +} + +/** + * garrow_feather_file_writer_close: + * @writer: A #GArrowFeatherFileWriter. + * @error: (nullable): Return locatipcn for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * Since: 0.4.0 + */ +gboolean +garrow_feather_file_writer_close(GArrowFeatherFileWriter *writer, + GError **error) +{ + auto arrow_writer = garrow_feather_file_writer_get_raw(writer); + + auto status = arrow_writer->Finalize(); + return garrow_error_check(error, status, "[feather-file-writer][close]"); +} + G_END_DECLS + GArrowRecordBatchWriter * garrow_record_batch_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchWriter> *arrow_writer) { @@ -314,3 +506,23 @@ garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchF NULL)); return writer; } + +GArrowFeatherFileWriter * +garrow_feather_file_writer_new_raw(arrow::ipc::feather::TableWriter *arrow_writer) +{ + auto writer = + GARROW_FEATHER_FILE_WRITER( + g_object_new(GARROW_TYPE_FEATHER_FILE_WRITER, + "feather-table-writer", arrow_writer, + NULL)); + return writer; +} + +arrow::ipc::feather::TableWriter * +garrow_feather_file_writer_get_raw(GArrowFeatherFileWriter *writer) +{ + GArrowFeatherFileWriterPrivate *priv; + + priv = GARROW_FEATHER_FILE_WRITER_GET_PRIVATE(writer); + return priv->feather_table_writer; +} http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/writer.h ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h index 9a98461..3853c2b 100644 --- a/c_glib/arrow-glib/writer.h +++ b/c_glib/arrow-glib/writer.h @@ -185,4 +185,64 @@ GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new( GArrowSchema *schema, GError **error); + +#define GARROW_TYPE_FEATHER_FILE_WRITER \ + (garrow_feather_file_writer_get_type()) +#define GARROW_FEATHER_FILE_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), \ + GARROW_TYPE_FEATHER_FILE_WRITER, \ + GArrowFeatherFileWriter)) +#define GARROW_FEATHER_FILE_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), \ + GARROW_TYPE_FEATHER_FILE_WRITER, \ + GArrowFeatherFileWriterClass)) +#define GARROW_IS_FEATHER_FILE_WRITER(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), \ + GARROW_TYPE_FEATHER_FILE_WRITER)) +#define GARROW_IS_FEATHER_FILE_WRITER_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), \ + GARROW_TYPE_FEATHER_FILE_WRITER)) +#define GARROW_FEATHER_FILE_WRITER_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), \ + GARROW_TYPE_FEATHER_FILE_WRITER, \ + GArrowFeatherFileWriterClass)) + +typedef struct _GArrowFeatherFileWriter GArrowFeatherFileWriter; +#ifndef __GTK_DOC_IGNORE__ +typedef struct _GArrowFeatherFileWriterClass GArrowFeatherFileWriterClass; +#endif + +/** + * GArrowFeatherFileWriter: + * + * It wraps `arrow::ipc::feather::TableWriter`. + */ +struct _GArrowFeatherFileWriter +{ + /*< private >*/ + GObject parent_instance; +}; + +#ifndef __GTK_DOC_IGNORE__ +struct _GArrowFeatherFileWriterClass +{ + GObjectClass parent_class; +}; +#endif + +GType garrow_feather_file_writer_get_type(void) G_GNUC_CONST; + +GArrowFeatherFileWriter *garrow_feather_file_writer_new(GArrowOutputStream *sink, + GError **error); +void garrow_feather_file_writer_set_description(GArrowFeatherFileWriter *writer, + const gchar *description); +void garrow_feather_file_writer_set_n_rows(GArrowFeatherFileWriter *writer, + gint64 n_rows); +gboolean garrow_feather_file_writer_append(GArrowFeatherFileWriter *writer, + const gchar *name, + GArrowArray *array, + GError **error); +gboolean garrow_feather_file_writer_close(GArrowFeatherFileWriter *writer, + GError **error); + G_END_DECLS http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/arrow-glib/writer.hpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/writer.hpp b/c_glib/arrow-glib/writer.hpp index c9ea660..d57f69b 100644 --- a/c_glib/arrow-glib/writer.hpp +++ b/c_glib/arrow-glib/writer.hpp @@ -21,6 +21,7 @@ #include <arrow/api.h> #include <arrow/ipc/api.h> +#include <arrow/ipc/feather.h> #include <arrow-glib/writer.h> @@ -30,3 +31,6 @@ std::shared_ptr<arrow::ipc::RecordBatchWriter> garrow_record_batch_writer_get_ra GArrowRecordBatchStreamWriter *garrow_record_batch_stream_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchStreamWriter> *arrow_writer); GArrowRecordBatchFileWriter *garrow_record_batch_file_writer_new_raw(std::shared_ptr<arrow::ipc::RecordBatchFileWriter> *arrow_writer); + +GArrowFeatherFileWriter *garrow_feather_file_writer_new_raw(arrow::ipc::feather::TableWriter *arrow_writer); +arrow::ipc::feather::TableWriter *garrow_feather_file_writer_get_raw(GArrowFeatherFileWriter *writer); http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/test/test-feather-file-reader.rb ---------------------------------------------------------------------- diff --git a/c_glib/test/test-feather-file-reader.rb b/c_glib/test/test-feather-file-reader.rb new file mode 100644 index 0000000..91f2f74 --- /dev/null +++ b/c_glib/test/test-feather-file-reader.rb @@ -0,0 +1,155 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFeatherFileReader < Test::Unit::TestCase + include Helper::Buildable + + def setup_file(data) + tempfile = Tempfile.open("arrow-feather-file-reader") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + writer = Arrow::FeatherFileWriter.new(output) + begin + if data[:description] + writer.description = data[:description] + end + writer.n_rows = data[:n_rows] || 0 + if data[:columns] + data[:columns].each do |name, array| + writer.append(name, array) + end + end + ensure + writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + reader = Arrow::FeatherFileReader.new(input) + yield(reader) + ensure + input.close + end + end + + sub_test_case("#description") do + test("exist") do + setup_file(:description => "Log") do |reader| + assert_equal("Log", reader.description) + end + end + + test("not exist") do + setup_file(:description => nil) do |reader| + assert_nil(reader.description) + end + end + end + + sub_test_case("#has_description?") do + test("exist") do + setup_file(:description => "Log") do |reader| + assert do + reader.has_description? + end + end + end + + test("not exist") do + setup_file(:description => nil) do |reader| + assert do + not reader.has_description? + end + end + end + end + + test("#version") do + setup_file({}) do |reader| + assert do + reader.version >= 2 + end + end + end + + test("#n_rows") do + setup_file(:n_rows => 3) do |reader| + assert_equal(3, reader.n_rows) + end + end + + test("#n_columns") do + columns = { + "messages" => build_string_array([]), + "is_critical" => build_boolean_array([]), + } + setup_file(:columns => columns) do |reader| + assert_equal(2, reader.n_columns) + end + end + + test("#get_column_name") do + columns = { + "messages" => build_string_array([]), + "is_critical" => build_boolean_array([]), + } + setup_file(:columns => columns) do |reader| + assert_equal([ + "messages", + "is_critical", + ], + [ + reader.get_column_name(0), + reader.get_column_name(1), + ]) + end + end + + test("#get_column") do + columns = { + "messages" => build_string_array([]), + "is_critical" => build_boolean_array([]), + } + setup_file(:columns => columns) do |reader| + assert_equal([ + "messages", + "is_critical", + ], + [ + reader.get_column(0).name, + reader.get_column(1).name, + ]) + end + end + + test("#columns") do + columns = { + "messages" => build_string_array([]), + "is_critical" => build_boolean_array([]), + } + setup_file(:columns => columns) do |reader| + assert_equal([ + "messages", + "is_critical", + ], + reader.columns.collect(&:name)) + end + end +end http://git-wip-us.apache.org/repos/asf/arrow/blob/62a17b7e/c_glib/test/test-feather-file-writer.rb ---------------------------------------------------------------------- diff --git a/c_glib/test/test-feather-file-writer.rb b/c_glib/test/test-feather-file-writer.rb new file mode 100644 index 0000000..9939215 --- /dev/null +++ b/c_glib/test/test-feather-file-writer.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFeatherFileWriter < Test::Unit::TestCase + include Helper::Buildable + + def test_append + tempfile = Tempfile.open("arrow-feather-file-writer") + output = Arrow::FileOutputStream.new(tempfile.path, false) + begin + writer = Arrow::FeatherFileWriter.new(output) + begin + writer.description = "Log" + writer.n_rows = 3 + writer.append("message", + build_string_array(["Crash", "Error", "Shutdown"])) + writer.append("is_critical", + build_boolean_array([true, true, false])) + ensure + writer.close + end + ensure + output.close + end + + input = Arrow::MemoryMappedInputStream.new(tempfile.path) + begin + reader = Arrow::FeatherFileReader.new(input) + assert_equal([true, "Log"], + [reader.has_description?, reader.description]) + column_values = {} + reader.columns.each do |column| + values = [] + column.data.chunks.each do |array| + array.length.times do |j| + if array.respond_to?(:get_string) + values << array.get_string(j) + else + values << array.get_value(j) + end + end + end + column_values[column.name] = values + end + assert_equal({ + "message" => ["Crash", "Error", "Shutdown"], + "is_critical" => [true, true, false], + }, + column_values) + ensure + input.close + end + end +end