[ 
https://issues.apache.org/jira/browse/ARROW-1849?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16265337#comment-16265337
 ] 

ASF GitHub Bot commented on ARROW-1849:
---------------------------------------

wesm closed pull request #1351: ARROW-1849: [GLib] Add input checks to 
GArrowRecordBatch
URL: https://github.com/apache/arrow/pull/1351
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/c_glib/arrow-glib/record-batch.cpp 
b/c_glib/arrow-glib/record-batch.cpp
index f23a0cf75..64f2020ad 100644
--- a/c_glib/arrow-glib/record-batch.cpp
+++ b/c_glib/arrow-glib/record-batch.cpp
@@ -28,6 +28,23 @@
 
 #include <sstream>
 
+static inline bool
+garrow_record_batch_adjust_index(const std::shared_ptr<arrow::RecordBatch> 
arrow_record_batch,
+                                 gint &i)
+{
+  auto n_columns = arrow_record_batch->num_columns();
+  if (i < 0) {
+    i += n_columns;
+    if (i < 0) {
+      return false;
+    }
+  }
+  if (i >= n_columns) {
+    return false;
+  }
+  return true;
+}
+
 G_BEGIN_DECLS
 
 /**
@@ -135,13 +152,15 @@ garrow_record_batch_class_init(GArrowRecordBatchClass 
*klass)
  * @schema: The schema of the record batch.
  * @n_rows: The number of the rows in the record batch.
  * @columns: (element-type GArrowArray): The columns in the record batch.
+ * @error: (nullable): Return location for a #GError or %NULL.
  *
- * Returns: A newly created #GArrowRecordBatch.
+ * Returns: (nullable): A newly created #GArrowRecordBatch or %NULL on error.
  */
 GArrowRecordBatch *
 garrow_record_batch_new(GArrowSchema *schema,
                         guint32 n_rows,
-                        GList *columns)
+                        GList *columns,
+                        GError **error)
 {
   std::vector<std::shared_ptr<arrow::Array>> arrow_columns;
   for (GList *node = columns; node; node = node->next) {
@@ -152,7 +171,12 @@ garrow_record_batch_new(GArrowSchema *schema,
   auto arrow_record_batch =
     arrow::RecordBatch::Make(garrow_schema_get_raw(schema),
                              n_rows, arrow_columns);
-  return garrow_record_batch_new_raw(&arrow_record_batch);
+  auto status = arrow_record_batch->Validate();
+  if (garrow_error_check(error, status, "[record-batch][new]")) {
+    return garrow_record_batch_new_raw(&arrow_record_batch);
+  } else {
+    return NULL;
+  }
 }
 
 /**
@@ -192,15 +216,21 @@ garrow_record_batch_get_schema(GArrowRecordBatch 
*record_batch)
 /**
  * garrow_record_batch_get_column:
  * @record_batch: A #GArrowRecordBatch.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: (transfer full): The i-th column in the record batch.
+ * Returns: (transfer full) (nullable): The i-th column in the record batch
+ *   on success, %NULL on out of index.
  */
 GArrowArray *
 garrow_record_batch_get_column(GArrowRecordBatch *record_batch,
-                               guint i)
+                               gint i)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+    return NULL;
+  }
   auto arrow_column = arrow_record_batch->column(i);
   return garrow_array_new_raw(&arrow_column);
 }
@@ -230,15 +260,21 @@ garrow_record_batch_get_columns(GArrowRecordBatch 
*record_batch)
 /**
  * garrow_record_batch_get_column_name:
  * @record_batch: A #GArrowRecordBatch.
- * @i: The index of the target column.
+ * @i: The index of the target column. If it's negative, index is
+ *   counted backward from the end of the columns. `-1` means the last
+ *   column.
  *
- * Returns: The name of the i-th column in the record batch.
+ * Returns: (nullable): The name of the i-th column in the record batch
+ *   on success, %NULL on out of index
  */
 const gchar *
 garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch,
-                                    guint i)
+                                    gint i)
 {
   const auto arrow_record_batch = garrow_record_batch_get_raw(record_batch);
+  if (!garrow_record_batch_adjust_index(arrow_record_batch, i)) {
+    return NULL;
+  }
   return arrow_record_batch->column_name(i).c_str();
 }
 
diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h
index 021f894f3..d31edf439 100644
--- a/c_glib/arrow-glib/record-batch.h
+++ b/c_glib/arrow-glib/record-batch.h
@@ -68,17 +68,18 @@ GType garrow_record_batch_get_type(void) G_GNUC_CONST;
 
 GArrowRecordBatch *garrow_record_batch_new(GArrowSchema *schema,
                                            guint32 n_rows,
-                                           GList *columns);
+                                           GList *columns,
+                                           GError **error);
 
 gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch,
                                    GArrowRecordBatch *other_record_batch);
 
 GArrowSchema *garrow_record_batch_get_schema     (GArrowRecordBatch 
*record_batch);
 GArrowArray  *garrow_record_batch_get_column     (GArrowRecordBatch 
*record_batch,
-                                                  guint i);
+                                                  gint i);
 GList        *garrow_record_batch_get_columns    (GArrowRecordBatch 
*record_batch);
 const gchar  *garrow_record_batch_get_column_name(GArrowRecordBatch 
*record_batch,
-                                                  guint i);
+                                                  gint i);
 guint         garrow_record_batch_get_n_columns  (GArrowRecordBatch 
*record_batch);
 gint64        garrow_record_batch_get_n_rows     (GArrowRecordBatch 
*record_batch);
 GArrowRecordBatch *garrow_record_batch_slice     (GArrowRecordBatch 
*record_batch,
diff --git a/c_glib/example/go/read-batch.go b/c_glib/example/go/read-batch.go
index ef1a7fb5a..1472939cd 100644
--- a/c_glib/example/go/read-batch.go
+++ b/c_glib/example/go/read-batch.go
@@ -57,8 +57,8 @@ func PrintColumnValue(column *arrow.Array, i int64) {
 func PrintRecordBatch(recordBatch *arrow.RecordBatch) {
        nColumns := recordBatch.GetNColumns()
        for i := uint32(0); i < nColumns; i++ {
-               column := recordBatch.GetColumn(i)
-               columnName := recordBatch.GetColumnName(i)
+               column := recordBatch.GetColumn(int32(i))
+               columnName := recordBatch.GetColumnName(int32(i))
                fmt.Printf("  %s: [", columnName)
                nRows := recordBatch.GetNRows()
                for j := int64(0); j < nRows; j++ {
diff --git a/c_glib/example/go/read-stream.go b/c_glib/example/go/read-stream.go
index 7bd076473..ed75a96c9 100644
--- a/c_glib/example/go/read-stream.go
+++ b/c_glib/example/go/read-stream.go
@@ -57,8 +57,8 @@ func PrintColumnValue(column *arrow.Array, i int64) {
 func PrintRecordBatch(recordBatch *arrow.RecordBatch) {
        nColumns := recordBatch.GetNColumns()
        for i := uint32(0); i < nColumns; i++ {
-               column := recordBatch.GetColumn(i)
-               columnName := recordBatch.GetColumnName(i)
+               column := recordBatch.GetColumn(int32(i))
+               columnName := recordBatch.GetColumnName(int32(i))
                fmt.Printf("  %s: [", columnName)
                nRows := recordBatch.GetNRows()
                for j := int64(0); j < nRows; j++ {
diff --git a/c_glib/example/go/write-batch.go b/c_glib/example/go/write-batch.go
index 9dbc3c00a..f4d03ed92 100644
--- a/c_glib/example/go/write-batch.go
+++ b/c_glib/example/go/write-batch.go
@@ -188,7 +188,10 @@ func main() {
                BuildDoubleArray(),
        }
 
-       recordBatch := arrow.NewRecordBatch(schema, 4, columns)
+       recordBatch, err := arrow.NewRecordBatch(schema, 4, columns)
+       if err != nil {
+               log.Fatalf("Failed to create record batch #1: %v", err)
+       }
        _, err = writer.WriteRecordBatch(recordBatch)
        if err != nil {
                log.Fatalf("Failed to write record batch #1: %v", err)
@@ -198,7 +201,10 @@ func main() {
        for i, column := range columns {
                slicedColumns[i] = column.Slice(1, 3)
        }
-       recordBatch = arrow.NewRecordBatch(schema, 3, slicedColumns)
+       recordBatch, err = arrow.NewRecordBatch(schema, 3, slicedColumns)
+       if err != nil {
+               log.Fatalf("Failed to create record batch #2: %v", err)
+       }
        _, err = writer.WriteRecordBatch(recordBatch)
        if err != nil {
                log.Fatalf("Failed to write record batch #2: %v", err)
diff --git a/c_glib/example/go/write-stream.go 
b/c_glib/example/go/write-stream.go
index 244741e8c..7225156a7 100644
--- a/c_glib/example/go/write-stream.go
+++ b/c_glib/example/go/write-stream.go
@@ -188,7 +188,10 @@ func main() {
                BuildDoubleArray(),
        }
 
-       recordBatch := arrow.NewRecordBatch(schema, 4, columns)
+       recordBatch, err := arrow.NewRecordBatch(schema, 4, columns)
+       if err != nil {
+               log.Fatalf("Failed to create record batch #1: %v", err)
+       }
        _, err = writer.WriteRecordBatch(recordBatch)
        if err != nil {
                log.Fatalf("Failed to write record batch #1: %v", err)
@@ -198,7 +201,10 @@ func main() {
        for i, column := range columns {
                slicedColumns[i] = column.Slice(1, 3)
        }
-       recordBatch = arrow.NewRecordBatch(schema, 3, slicedColumns)
+       recordBatch, err = arrow.NewRecordBatch(schema, 3, slicedColumns)
+       if err != nil {
+               log.Fatalf("Failed to create record batch #2: %v", err)
+       }
        writer.WriteRecordBatch(recordBatch)
        _, err = writer.WriteRecordBatch(recordBatch)
        if err != nil {
diff --git a/c_glib/test/test-record-batch.rb b/c_glib/test/test-record-batch.rb
index 9fd34b7d4..365922f49 100644
--- a/c_glib/test/test-record-batch.rb
+++ b/c_glib/test/test-record-batch.rb
@@ -18,32 +18,53 @@
 class TestTable < Test::Unit::TestCase
   include Helper::Buildable
 
-  def test_new
-    fields = [
-      Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-      Arrow::Field.new("valid", Arrow::BooleanDataType.new),
-    ]
-    schema = Arrow::Schema.new(fields)
-    columns = [
-      build_boolean_array([true]),
-      build_boolean_array([false]),
-    ]
-    record_batch = Arrow::RecordBatch.new(schema, 1, columns)
-    assert_equal(1, record_batch.n_rows)
+  sub_test_case(".new") do
+    def test_valid
+      fields = [
+        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+      ]
+      schema = Arrow::Schema.new(fields)
+      columns = [
+        build_boolean_array([true]),
+        build_boolean_array([false]),
+      ]
+      record_batch = Arrow::RecordBatch.new(schema, 1, columns)
+      assert_equal(1, record_batch.n_rows)
+    end
+
+    def test_no_columns
+      fields = [
+        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
+      ]
+      schema = Arrow::Schema.new(fields)
+      message = "[record-batch][new]: " +
+        "Invalid: Number of columns did not match schema"
+      assert_raise(Arrow::Error::Invalid.new(message)) do
+        Arrow::RecordBatch.new(schema, 0, [])
+      end
+    end
   end
 
   sub_test_case("instance methods") do
     def setup
+      @visible_field = Arrow::Field.new("visible", Arrow::BooleanDataType.new)
+      @visible_values = [true, false, true, false, true]
+      @valid_field = Arrow::Field.new("valid", Arrow::BooleanDataType.new)
+      @valid_values = [false, true, false, true, false]
+
       fields = [
-        Arrow::Field.new("visible", Arrow::BooleanDataType.new),
-        Arrow::Field.new("valid", Arrow::BooleanDataType.new),
+        @visible_field,
+        @valid_field,
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        build_boolean_array([true, false, true, false, true, false]),
-        build_boolean_array([false, true, false, true, false]),
+        build_boolean_array(@visible_values),
+        build_boolean_array(@valid_values),
       ]
-      @record_batch = Arrow::RecordBatch.new(schema, 5, columns)
+      @record_batch = Arrow::RecordBatch.new(schema,
+                                             @visible_values.size,
+                                             columns)
     end
 
     def test_equal
@@ -53,7 +74,7 @@ def test_equal
       ]
       schema = Arrow::Schema.new(fields)
       columns = [
-        build_boolean_array([true, false, true, false, true, false]),
+        build_boolean_array([true, false, true, false, true]),
         build_boolean_array([false, true, false, true, false]),
       ]
       other_record_batch = Arrow::RecordBatch.new(schema, 5, columns)
@@ -66,12 +87,28 @@ def test_schema
                    @record_batch.schema.fields.collect(&:name))
     end
 
-    def test_column
-      assert_equal(5, @record_batch.get_column(1).length)
+    sub_test_case("#column") do
+      def test_positive
+        assert_equal(build_boolean_array(@valid_values),
+                     @record_batch.get_column(1))
+      end
+
+      def test_negative
+        assert_equal(build_boolean_array(@visible_values),
+                     @record_batch.get_column(-2))
+      end
+
+      def test_positive_out_of_index
+        assert_nil(@record_batch.get_column(2))
+      end
+
+      def test_negative_out_of_index
+        assert_nil(@record_batch.get_column(-3))
+      end
     end
 
     def test_columns
-      assert_equal([6, 5],
+      assert_equal([5, 5],
                    @record_batch.columns.collect(&:length))
     end
 
@@ -94,7 +131,7 @@ def test_slice
 
     def test_to_s
       assert_equal(<<-PRETTY_PRINT, @record_batch.to_s)
-visible: [true, false, true, false, true, false]
+visible: [true, false, true, false, true]
 valid: [false, true, false, true, false]
       PRETTY_PRINT
     end


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [GLib] Add input checks to GArrowRecordBatch
> --------------------------------------------
>
>                 Key: ARROW-1849
>                 URL: https://issues.apache.org/jira/browse/ARROW-1849
>             Project: Apache Arrow
>          Issue Type: Improvement
>          Components: GLib
>            Reporter: Kouhei Sutou
>            Assignee: Kouhei Sutou
>            Priority: Minor
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>




--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to