[
https://issues.apache.org/jira/browse/ARROW-1808?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16260263#comment-16260263
]
ASF GitHub Bot commented on ARROW-1808:
---------------------------------------
kou commented on issue #1337: ARROW-1808: [C++] Make RecordBatch, Table virtual
interfaces for column access
URL: https://github.com/apache/arrow/pull/1337#issuecomment-345919956
@wesm I confirmed. The test creates 0 rows record batch with empty columns.
It causes the segmentation fault. The following patch fixes this:
```diff
diff --git a/c_glib/test/test-file-writer.rb
b/c_glib/test/test-file-writer.rb
index 3de8e5cf..67aed85f 100644
--- a/c_glib/test/test-file-writer.rb
+++ b/c_glib/test/test-file-writer.rb
@@ -19,14 +19,18 @@ class TestFileWriter < Test::Unit::TestCase
include Helper::Buildable
def test_write_record_batch
+ data = [true]
+ field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+ schema = Arrow::Schema.new([field])
+
tempfile = Tempfile.open("arrow-ipc-file-writer")
output = Arrow::FileOutputStream.new(tempfile.path, false)
begin
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- schema = Arrow::Schema.new([field])
file_writer = Arrow::RecordBatchFileWriter.new(output, schema)
begin
- record_batch = Arrow::RecordBatch.new(schema, 0, [])
+ record_batch = Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)])
file_writer.write_record_batch(record_batch)
ensure
file_writer.close
@@ -38,8 +42,12 @@ class TestFileWriter < Test::Unit::TestCase
input = Arrow::MemoryMappedInputStream.new(tempfile.path)
begin
file_reader = Arrow::RecordBatchFileReader.new(input)
- assert_equal(["enabled"],
+ assert_equal([field.name],
file_reader.schema.fields.collect(&:name))
+ assert_equal(Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)]),
+ file_reader.read_record_batch(0))
ensure
input.close
end
diff --git a/c_glib/test/test-gio-input-stream.rb
b/c_glib/test/test-gio-input-stream.rb
index a71a3704..2adf25b3 100644
--- a/c_glib/test/test-gio-input-stream.rb
+++ b/c_glib/test/test-gio-input-stream.rb
@@ -16,15 +16,21 @@
# under the License.
class TestGIOInputStream < Test::Unit::TestCase
+ include Helper::Buildable
+
def test_reader_backend
+ data = [true]
+ field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+ schema = Arrow::Schema.new([field])
+
tempfile = Tempfile.open("arrow-gio-input-stream")
output = Arrow::FileOutputStream.new(tempfile.path, false)
begin
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- schema = Arrow::Schema.new([field])
file_writer = Arrow::RecordBatchFileWriter.new(output, schema)
begin
- record_batch = Arrow::RecordBatch.new(schema, 0, [])
+ record_batch = Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)])
file_writer.write_record_batch(record_batch)
ensure
file_writer.close
@@ -38,8 +44,12 @@ class TestGIOInputStream < Test::Unit::TestCase
input = Arrow::GIOInputStream.new(input_stream)
begin
file_reader = Arrow::RecordBatchFileReader.new(input)
- assert_equal(["enabled"],
+ assert_equal([field.name],
file_reader.schema.fields.collect(&:name))
+ assert_equal(Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)]),
+ file_reader.read_record_batch(0))
ensure
input.close
end
diff --git a/c_glib/test/test-gio-output-stream.rb
b/c_glib/test/test-gio-output-stream.rb
index adaa8c1b..c77598ed 100644
--- a/c_glib/test/test-gio-output-stream.rb
+++ b/c_glib/test/test-gio-output-stream.rb
@@ -16,17 +16,23 @@
# under the License.
class TestGIOOutputStream < Test::Unit::TestCase
+ include Helper::Buildable
+
def test_writer_backend
+ data = [true]
+ field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+ schema = Arrow::Schema.new([field])
+
tempfile = Tempfile.open("arrow-gio-output-stream")
file = Gio::File.new_for_path(tempfile.path)
output_stream = file.append_to(:none)
output = Arrow::GIOOutputStream.new(output_stream)
begin
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- schema = Arrow::Schema.new([field])
file_writer = Arrow::RecordBatchFileWriter.new(output, schema)
begin
- record_batch = Arrow::RecordBatch.new(schema, 0, [])
+ record_batch = Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)])
file_writer.write_record_batch(record_batch)
ensure
file_writer.close
@@ -38,8 +44,12 @@ class TestGIOOutputStream < Test::Unit::TestCase
input = Arrow::MemoryMappedInputStream.new(tempfile.path)
begin
file_reader = Arrow::RecordBatchFileReader.new(input)
- assert_equal(["enabled"],
+ assert_equal([field.name],
file_reader.schema.fields.collect(&:name))
+ assert_equal(Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)]),
+ file_reader.read_record_batch(0))
ensure
input.close
end
diff --git a/c_glib/test/test-stream-writer.rb
b/c_glib/test/test-stream-writer.rb
index c3d0e149..32754e20 100644
--- a/c_glib/test/test-stream-writer.rb
+++ b/c_glib/test/test-stream-writer.rb
@@ -19,17 +19,19 @@ class TestStreamWriter < Test::Unit::TestCase
include Helper::Buildable
def test_write_record_batch
+ data = [true]
+ field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
+ schema = Arrow::Schema.new([field])
+
tempfile = Tempfile.open("arrow-ipc-stream-writer")
output = Arrow::FileOutputStream.new(tempfile.path, false)
begin
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- schema = Arrow::Schema.new([field])
stream_writer = Arrow::RecordBatchStreamWriter.new(output, schema)
begin
columns = [
- build_boolean_array([true]),
+ build_boolean_array(data),
]
- record_batch = Arrow::RecordBatch.new(schema, 1, columns)
+ record_batch = Arrow::RecordBatch.new(schema, data.size, columns)
stream_writer.write_record_batch(record_batch)
ensure
stream_writer.close
@@ -41,10 +43,12 @@ class TestStreamWriter < Test::Unit::TestCase
input = Arrow::MemoryMappedInputStream.new(tempfile.path)
begin
stream_reader = Arrow::RecordBatchStreamReader.new(input)
- assert_equal(["enabled"],
+ assert_equal([field.name],
stream_reader.schema.fields.collect(&:name))
- assert_equal(true,
- stream_reader.read_next.get_column(0).get_value(0))
+ assert_equal(Arrow::RecordBatch.new(schema,
+ data.size,
+ [build_boolean_array(data)]),
+ stream_reader.read_next)
assert_nil(stream_reader.read_next)
ensure
input.close
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++] Make RecordBatch interface virtual to permit record batches that
> lazy-materialize columns
> -----------------------------------------------------------------------------------------------
>
> Key: ARROW-1808
> URL: https://issues.apache.org/jira/browse/ARROW-1808
> Project: Apache Arrow
> Issue Type: Improvement
> Components: C++
> Reporter: Wes McKinney
> Assignee: Wes McKinney
> Labels: pull-request-available
> Fix For: 0.8.0
>
>
> This should be looked at soon to prevent having to define a different virtual
> interface for record batches. There are places where we are using the record
> batch constructor directly, and in some third party code (like MapD), so this
> might be good to get done for 0.8.0
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)