[ https://issues.apache.org/jira/browse/ARROW-1808?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16260263#comment-16260263 ]
ASF GitHub Bot commented on ARROW-1808: --------------------------------------- kou commented on issue #1337: ARROW-1808: [C++] Make RecordBatch, Table virtual interfaces for column access URL: https://github.com/apache/arrow/pull/1337#issuecomment-345919956 @wesm I confirmed. The test creates 0 rows record batch with empty columns. It causes the segmentation fault. The following patch fixes this: ```diff diff --git a/c_glib/test/test-file-writer.rb b/c_glib/test/test-file-writer.rb index 3de8e5cf..67aed85f 100644 --- a/c_glib/test/test-file-writer.rb +++ b/c_glib/test/test-file-writer.rb @@ -19,14 +19,18 @@ class TestFileWriter < Test::Unit::TestCase include Helper::Buildable def test_write_record_batch + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + tempfile = Tempfile.open("arrow-ipc-file-writer") output = Arrow::FileOutputStream.new(tempfile.path, false) begin - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - schema = Arrow::Schema.new([field]) file_writer = Arrow::RecordBatchFileWriter.new(output, schema) begin - record_batch = Arrow::RecordBatch.new(schema, 0, []) + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) file_writer.write_record_batch(record_batch) ensure file_writer.close @@ -38,8 +42,12 @@ class TestFileWriter < Test::Unit::TestCase input = Arrow::MemoryMappedInputStream.new(tempfile.path) begin file_reader = Arrow::RecordBatchFileReader.new(input) - assert_equal(["enabled"], + assert_equal([field.name], file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) ensure input.close end diff --git a/c_glib/test/test-gio-input-stream.rb b/c_glib/test/test-gio-input-stream.rb index a71a3704..2adf25b3 100644 --- a/c_glib/test/test-gio-input-stream.rb +++ b/c_glib/test/test-gio-input-stream.rb @@ -16,15 +16,21 @@ # under the License. class TestGIOInputStream < Test::Unit::TestCase + include Helper::Buildable + def test_reader_backend + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + tempfile = Tempfile.open("arrow-gio-input-stream") output = Arrow::FileOutputStream.new(tempfile.path, false) begin - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - schema = Arrow::Schema.new([field]) file_writer = Arrow::RecordBatchFileWriter.new(output, schema) begin - record_batch = Arrow::RecordBatch.new(schema, 0, []) + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) file_writer.write_record_batch(record_batch) ensure file_writer.close @@ -38,8 +44,12 @@ class TestGIOInputStream < Test::Unit::TestCase input = Arrow::GIOInputStream.new(input_stream) begin file_reader = Arrow::RecordBatchFileReader.new(input) - assert_equal(["enabled"], + assert_equal([field.name], file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) ensure input.close end diff --git a/c_glib/test/test-gio-output-stream.rb b/c_glib/test/test-gio-output-stream.rb index adaa8c1b..c77598ed 100644 --- a/c_glib/test/test-gio-output-stream.rb +++ b/c_glib/test/test-gio-output-stream.rb @@ -16,17 +16,23 @@ # under the License. class TestGIOOutputStream < Test::Unit::TestCase + include Helper::Buildable + def test_writer_backend + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + tempfile = Tempfile.open("arrow-gio-output-stream") file = Gio::File.new_for_path(tempfile.path) output_stream = file.append_to(:none) output = Arrow::GIOOutputStream.new(output_stream) begin - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - schema = Arrow::Schema.new([field]) file_writer = Arrow::RecordBatchFileWriter.new(output, schema) begin - record_batch = Arrow::RecordBatch.new(schema, 0, []) + record_batch = Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]) file_writer.write_record_batch(record_batch) ensure file_writer.close @@ -38,8 +44,12 @@ class TestGIOOutputStream < Test::Unit::TestCase input = Arrow::MemoryMappedInputStream.new(tempfile.path) begin file_reader = Arrow::RecordBatchFileReader.new(input) - assert_equal(["enabled"], + assert_equal([field.name], file_reader.schema.fields.collect(&:name)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + file_reader.read_record_batch(0)) ensure input.close end diff --git a/c_glib/test/test-stream-writer.rb b/c_glib/test/test-stream-writer.rb index c3d0e149..32754e20 100644 --- a/c_glib/test/test-stream-writer.rb +++ b/c_glib/test/test-stream-writer.rb @@ -19,17 +19,19 @@ class TestStreamWriter < Test::Unit::TestCase include Helper::Buildable def test_write_record_batch + data = [true] + field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) + schema = Arrow::Schema.new([field]) + tempfile = Tempfile.open("arrow-ipc-stream-writer") output = Arrow::FileOutputStream.new(tempfile.path, false) begin - field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new) - schema = Arrow::Schema.new([field]) stream_writer = Arrow::RecordBatchStreamWriter.new(output, schema) begin columns = [ - build_boolean_array([true]), + build_boolean_array(data), ] - record_batch = Arrow::RecordBatch.new(schema, 1, columns) + record_batch = Arrow::RecordBatch.new(schema, data.size, columns) stream_writer.write_record_batch(record_batch) ensure stream_writer.close @@ -41,10 +43,12 @@ class TestStreamWriter < Test::Unit::TestCase input = Arrow::MemoryMappedInputStream.new(tempfile.path) begin stream_reader = Arrow::RecordBatchStreamReader.new(input) - assert_equal(["enabled"], + assert_equal([field.name], stream_reader.schema.fields.collect(&:name)) - assert_equal(true, - stream_reader.read_next.get_column(0).get_value(0)) + assert_equal(Arrow::RecordBatch.new(schema, + data.size, + [build_boolean_array(data)]), + stream_reader.read_next) assert_nil(stream_reader.read_next) ensure input.close ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [C++] Make RecordBatch interface virtual to permit record batches that > lazy-materialize columns > ----------------------------------------------------------------------------------------------- > > Key: ARROW-1808 > URL: https://issues.apache.org/jira/browse/ARROW-1808 > Project: Apache Arrow > Issue Type: Improvement > Components: C++ > Reporter: Wes McKinney > Assignee: Wes McKinney > Labels: pull-request-available > Fix For: 0.8.0 > > > This should be looked at soon to prevent having to define a different virtual > interface for record batches. There are places where we are using the record > batch constructor directly, and in some third party code (like MapD), so this > might be good to get done for 0.8.0 -- This message was sent by Atlassian JIRA (v6.4.14#64029)