This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new a7b359b ARROW-9278: [C++][Python] Remove validity bitmap from Union
types, update IPC read/write and integration tests
a7b359b is described below
commit a7b359b59099bcfc64938132a1823c138589be3c
Author: Wes McKinney <[email protected]>
AuthorDate: Thu Jul 2 18:57:05 2020 -0500
ARROW-9278: [C++][Python] Remove validity bitmap from Union types, update
IPC read/write and integration tests
I am using the same `DataTypeLayout::AlwaysNull()` strategy for
`ArrayData::buffers[0]` as with NullType and like with NullType, no validity
buffer is sent or received in the IPC paths.
There are some related changes here that are a minor API breakage around
the behavior of `AppendNull` and `AppendNulls` for `StructBuilder` and the
union builder classes. The issue is that these functions really should take
responsibility for maintaining the internal consistency of the child builders
through null appends. I deleted some code in several places where this detail
leaked outside of these functions. While there is some risk of breaking third
party code (if they are using t [...]
Assuming the Union and Metadata V5 votes carry on the mailing list, we will
want to add backwards compatibility code for reading V4 metadata, accepting
unions that do not have any top-level nulls but rejecting ones that do.
Closes #7598 from wesm/ARROW-9278
Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
c_glib/test/test-dense-union-array.rb | 16 +++---
c_glib/test/test-sparse-union-array.rb | 4 +-
cpp/src/arrow/array/array_base.h | 7 ++-
cpp/src/arrow/array/array_nested.cc | 46 ++++++++-------
cpp/src/arrow/array/array_nested.h | 12 ++--
cpp/src/arrow/array/array_struct_test.cc | 10 ----
cpp/src/arrow/array/array_test.cc | 39 ++++++++++++-
cpp/src/arrow/array/array_union_test.cc | 12 ++--
cpp/src/arrow/array/array_view_test.cc | 17 ------
cpp/src/arrow/array/builder_base.h | 2 +-
cpp/src/arrow/array/builder_nested.cc | 3 +
cpp/src/arrow/array/builder_nested.h | 11 +++-
cpp/src/arrow/array/builder_union.cc | 10 ++--
cpp/src/arrow/array/builder_union.h | 53 +++++++++++------
cpp/src/arrow/array/data.cc | 23 +++-----
cpp/src/arrow/array/diff.cc | 6 +-
cpp/src/arrow/array/util.cc | 29 ++++++++--
cpp/src/arrow/ipc/json_simple.cc | 66 +++++-----------------
cpp/src/arrow/ipc/json_simple_test.cc | 25 ++++----
cpp/src/arrow/ipc/reader.cc | 44 +++++++++------
cpp/src/arrow/ipc/test_common.cc | 24 +++-----
cpp/src/arrow/ipc/writer.cc | 4 +-
cpp/src/arrow/python/deserialize.cc | 21 ++++---
cpp/src/arrow/python/python_to_arrow.cc | 10 +---
cpp/src/arrow/python/serialize.cc | 4 +-
cpp/src/arrow/python/serialize.h | 1 +
cpp/src/arrow/testing/json_internal.cc | 20 ++-----
cpp/src/arrow/type.cc | 4 +-
cpp/src/arrow/type.h | 11 ++++
dev/archery/archery/integration/datagen.py | 42 +++++---------
ruby/red-arrow/lib/arrow/struct-array-builder.rb | 3 -
.../test/raw-records/test-dense-union-array.rb | 35 +-----------
.../test/raw-records/test-sparse-union-array.rb | 34 +----------
.../test/values/test-dense-union-array.rb | 35 +-----------
.../test/values/test-sparse-union-array.rb | 34 +----------
35 files changed, 294 insertions(+), 423 deletions(-)
diff --git a/c_glib/test/test-dense-union-array.rb
b/c_glib/test/test-dense-union-array.rb
index aec12b4..ecd17d5 100644
--- a/c_glib/test/test-dense-union-array.rb
+++ b/c_glib/test/test-dense-union-array.rb
@@ -21,10 +21,10 @@ class TestDenseUnionArray < Test::Unit::TestCase
sub_test_case(".new") do
sub_test_case("default") do
def setup
- type_ids = build_int8_array([0, 1, nil, 1, 1])
- value_offsets = build_int32_array([0, 0, 0, 1, 2])
+ type_ids = build_int8_array([0, 1, 0, 1, 1])
+ value_offsets = build_int32_array([0, 0, 1, 1, 2])
fields = [
- build_int16_array([1]),
+ build_int16_array([1, nil]),
build_string_array(["a", "b", "c"]),
]
@array = Arrow::DenseUnionArray.new(type_ids, value_offsets, fields)
@@ -41,7 +41,7 @@ class TestDenseUnionArray < Test::Unit::TestCase
def test_field
assert_equal([
- build_int16_array([1]),
+ build_int16_array([1, nil]),
build_string_array(["a", "b", "c"]),
],
[
@@ -59,10 +59,10 @@ class TestDenseUnionArray < Test::Unit::TestCase
]
type_codes = [11, 13]
@data_type = Arrow::DenseUnionDataType.new(data_type_fields,
type_codes)
- type_ids = build_int8_array([11, 13, nil, 13, 13])
- value_offsets = build_int32_array([0, 0, 0, 1, 2])
+ type_ids = build_int8_array([11, 13, 11, 13, 13])
+ value_offsets = build_int32_array([0, 0, 1, 1, 2])
fields = [
- build_int16_array([1]),
+ build_int16_array([1, nil]),
build_string_array(["a", "b", "c"])
]
@array = Arrow::DenseUnionArray.new(@data_type, type_ids,
value_offsets, fields)
@@ -75,7 +75,7 @@ class TestDenseUnionArray < Test::Unit::TestCase
def test_field
assert_equal([
- build_int16_array([1]),
+ build_int16_array([1, nil]),
build_string_array(["a", "b", "c"]),
],
[
diff --git a/c_glib/test/test-sparse-union-array.rb
b/c_glib/test/test-sparse-union-array.rb
index 62b7b3d..1132ccb 100644
--- a/c_glib/test/test-sparse-union-array.rb
+++ b/c_glib/test/test-sparse-union-array.rb
@@ -21,7 +21,7 @@ class TestSparseUnionArray < Test::Unit::TestCase
sub_test_case(".new") do
sub_test_case("default") do
def setup
- type_ids = build_int8_array([0, 1, nil, 1, 0])
+ type_ids = build_int8_array([0, 1, 0, 1, 0])
fields = [
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
@@ -58,7 +58,7 @@ class TestSparseUnionArray < Test::Unit::TestCase
]
type_codes = [11, 13]
@data_type = Arrow::SparseUnionDataType.new(data_type_fields,
type_codes)
- type_ids = build_int8_array([11, 13, nil, 13, 11])
+ type_ids = build_int8_array([11, 13, 11, 13, 11])
fields = [
build_int16_array([1, nil, nil, nil, 5]),
build_string_array([nil, "b", nil, "d", nil]),
diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h
index af8a881..7aba6c9 100644
--- a/cpp/src/arrow/array/array_base.h
+++ b/cpp/src/arrow/array/array_base.h
@@ -86,15 +86,16 @@ class ARROW_EXPORT Array {
std::shared_ptr<DataType> type() const { return data_->type; }
Type::type type_id() const { return data_->type->id(); }
- /// Buffer for the null bitmap.
+ /// Buffer for the validity (null) bitmap, if any. Note that Union types
+ /// never have a null bitmap.
///
- /// Note that for `null_count == 0`, this can be null.
+ /// Note that for `null_count == 0` or for null type, this will be null.
/// This buffer does not account for any slice offset
std::shared_ptr<Buffer> null_bitmap() const { return data_->buffers[0]; }
/// Raw pointer to the null bitmap.
///
- /// Note that for `null_count == 0`, this can be null.
+ /// Note that for `null_count == 0` or for null type, this will be null.
/// This buffer does not account for any slice offset
const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
diff --git a/cpp/src/arrow/array/array_nested.cc
b/cpp/src/arrow/array/array_nested.cc
index 311f43a..e8d4ed9 100644
--- a/cpp/src/arrow/array/array_nested.cc
+++ b/cpp/src/arrow/array/array_nested.cc
@@ -610,9 +610,11 @@ void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
void SparseUnionArray::SetData(std::shared_ptr<ArrayData> data) {
this->UnionArray::SetData(std::move(data));
-
ARROW_CHECK_EQ(data_->type->id(), Type::SPARSE_UNION);
ARROW_CHECK_EQ(data_->buffers.size(), 2);
+
+ // No validity bitmap
+ ARROW_CHECK_EQ(data_->buffers[0], nullptr);
}
void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
@@ -620,6 +622,10 @@ void DenseUnionArray::SetData(const
std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION);
ARROW_CHECK_EQ(data_->buffers.size(), 3);
+
+ // No validity bitmap
+ ARROW_CHECK_EQ(data_->buffers[0], nullptr);
+
auto value_offsets = data_->buffers[2];
raw_value_offsets_ = value_offsets == nullptr
? nullptr
@@ -632,12 +638,10 @@
SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
SparseUnionArray::SparseUnionArray(std::shared_ptr<DataType> type, int64_t
length,
ArrayVector children,
- std::shared_ptr<Buffer> type_codes,
- std::shared_ptr<Buffer> null_bitmap,
- int64_t null_count, int64_t offset) {
- auto internal_data = ArrayData::Make(
- std::move(type), length,
- BufferVector{std::move(null_bitmap), std::move(type_codes)}, null_count,
offset);
+ std::shared_ptr<Buffer> type_codes, int64_t
offset) {
+ auto internal_data = ArrayData::Make(std::move(type), length,
+ BufferVector{nullptr,
std::move(type_codes)},
+ /*null_count=*/0, offset);
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
@@ -650,13 +654,11 @@ DenseUnionArray::DenseUnionArray(const
std::shared_ptr<ArrayData>& data) {
DenseUnionArray::DenseUnionArray(std::shared_ptr<DataType> type, int64_t
length,
ArrayVector children, std::shared_ptr<Buffer>
type_ids,
- std::shared_ptr<Buffer> value_offsets,
- std::shared_ptr<Buffer> null_bitmap, int64_t
null_count,
- int64_t offset) {
+ std::shared_ptr<Buffer> value_offsets,
int64_t offset) {
auto internal_data = ArrayData::Make(
std::move(type), length,
- BufferVector{std::move(null_bitmap), std::move(type_ids),
std::move(value_offsets)},
- null_count, offset);
+ BufferVector{nullptr, std::move(type_ids), std::move(value_offsets)},
+ /*null_count=*/0, offset);
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
@@ -678,8 +680,12 @@ Result<std::shared_ptr<Array>> DenseUnionArray::Make(
return Status::TypeError("UnionArray type_ids must be signed int8");
}
+ if (type_ids.null_count() != 0) {
+ return Status::Invalid("Union type ids may not have nulls");
+ }
+
if (value_offsets.null_count() != 0) {
- return Status::Invalid("Make does not allow NAs in value_offsets");
+ return Status::Invalid("Make does not allow nulls in value_offsets");
}
if (field_names.size() > 0 && field_names.size() != children.size()) {
@@ -690,14 +696,13 @@ Result<std::shared_ptr<Array>> DenseUnionArray::Make(
return Status::Invalid("type_codes must have the same length as children");
}
- BufferVector buffers = {type_ids.null_bitmap(),
- checked_cast<const Int8Array&>(type_ids).values(),
+ BufferVector buffers = {nullptr, checked_cast<const
Int8Array&>(type_ids).values(),
checked_cast<const
Int32Array&>(value_offsets).values()};
auto union_type = dense_union(children, std::move(field_names),
std::move(type_codes));
auto internal_data =
ArrayData::Make(std::move(union_type), type_ids.length(),
std::move(buffers),
- type_ids.null_count(), type_ids.offset());
+ /*null_count=*/0, type_ids.offset());
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
@@ -711,6 +716,10 @@ Result<std::shared_ptr<Array>> SparseUnionArray::Make(
return Status::TypeError("UnionArray type_ids must be signed int8");
}
+ if (type_ids.null_count() != 0) {
+ return Status::Invalid("Union type ids may not have nulls");
+ }
+
if (field_names.size() > 0 && field_names.size() != children.size()) {
return Status::Invalid("field_names must have the same length as
children");
}
@@ -719,12 +728,11 @@ Result<std::shared_ptr<Array>> SparseUnionArray::Make(
return Status::Invalid("type_codes must have the same length as children");
}
- BufferVector buffers = {type_ids.null_bitmap(),
- checked_cast<const Int8Array&>(type_ids).values()};
+ BufferVector buffers = {nullptr, checked_cast<const
Int8Array&>(type_ids).values()};
auto union_type = sparse_union(children, std::move(field_names),
std::move(type_codes));
auto internal_data =
ArrayData::Make(std::move(union_type), type_ids.length(),
std::move(buffers),
- type_ids.null_count(), type_ids.offset());
+ /*null_count=*/0, type_ids.offset());
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
if (child->length() != type_ids.length()) {
diff --git a/cpp/src/arrow/array/array_nested.h
b/cpp/src/arrow/array/array_nested.h
index b4a5229..e37c34b 100644
--- a/cpp/src/arrow/array/array_nested.h
+++ b/cpp/src/arrow/array/array_nested.h
@@ -401,9 +401,7 @@ class ARROW_EXPORT SparseUnionArray : public UnionArray {
explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector
children,
- std::shared_ptr<Buffer> type_ids,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+ std::shared_ptr<Buffer> type_ids, int64_t offset = 0);
/// \brief Construct SparseUnionArray from type_ids and children
///
@@ -438,7 +436,9 @@ class ARROW_EXPORT SparseUnionArray : public UnionArray {
void SetData(std::shared_ptr<ArrayData> data);
};
-/// Concrete Array class for dense union data
+/// \brief Concrete Array class for dense union data
+///
+/// Note that union types do not have a validity bitmap
class ARROW_EXPORT DenseUnionArray : public UnionArray {
public:
using TypeClass = DenseUnionType;
@@ -447,9 +447,7 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray {
DenseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector
children,
std::shared_ptr<Buffer> type_ids,
- std::shared_ptr<Buffer> value_offsets = NULLPTR,
- std::shared_ptr<Buffer> null_bitmap = NULLPTR,
- int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+ std::shared_ptr<Buffer> value_offsets = NULLPTR, int64_t
offset = 0);
/// \brief Construct DenseUnionArray from type_ids, value_offsets, and
children
///
diff --git a/cpp/src/arrow/array/array_struct_test.cc
b/cpp/src/arrow/array/array_struct_test.cc
index e4f50b1..0afadcf 100644
--- a/cpp/src/arrow/array/array_struct_test.cc
+++ b/cpp/src/arrow/array/array_struct_test.cc
@@ -256,16 +256,6 @@ TEST_F(TestStructBuilder, TestAppendNull) {
ASSERT_OK(builder_->AppendNull());
ASSERT_EQ(2, static_cast<int>(builder_->num_fields()));
- ListBuilder* list_vb =
checked_cast<ListBuilder*>(builder_->field_builder(0));
- ASSERT_OK(list_vb->AppendNull());
- ASSERT_OK(list_vb->AppendNull());
- ASSERT_EQ(2, list_vb->length());
-
- Int32Builder* int_vb =
checked_cast<Int32Builder*>(builder_->field_builder(1));
- ASSERT_OK(int_vb->AppendNull());
- ASSERT_OK(int_vb->AppendNull());
- ASSERT_EQ(2, int_vb->length());
-
Done();
ASSERT_OK(result_->ValidateFull());
diff --git a/cpp/src/arrow/array/array_test.cc
b/cpp/src/arrow/array/array_test.cc
index 0f53d1c..e4090ee 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -298,8 +298,6 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
fixed_size_list(int64(), 4),
dictionary(int32(), utf8()),
struct_({field("a", utf8()), field("b", int32())}),
- sparse_union({field("a", utf8()), field("b", int32())}, {0, 1}),
- dense_union({field("a", utf8()), field("b", int32())}, {0, 1}),
// clang-format on
};
@@ -313,6 +311,43 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
}
}
+TEST_F(TestArray, TestMakeArrayOfNullUnion) {
+ // Unions need special checking -- the top level null count is 0 (per
+ // ARROW-9222) so we check the first child to make sure is contains all nulls
+ // and check that the type_ids all point to the first child
+ const int64_t union_length = 10;
+ auto s_union_ty = sparse_union({field("a", utf8()), field("b", int32())},
{0, 1});
+ ASSERT_OK_AND_ASSIGN(auto s_union_nulls, MakeArrayOfNull(s_union_ty,
union_length));
+ ASSERT_EQ(s_union_nulls->null_count(), 0);
+ {
+ const auto& typed_union = checked_cast<const
SparseUnionArray&>(*s_union_nulls);
+ ASSERT_EQ(typed_union.field(0)->null_count(), union_length);
+
+ // Check type codes are all 0
+ for (int i = 0; i < union_length; ++i) {
+ ASSERT_EQ(typed_union.raw_type_codes()[i], 0);
+ }
+ }
+
+ auto d_union_ty = dense_union({field("a", utf8()), field("b", int32())}, {0,
1});
+ ASSERT_OK_AND_ASSIGN(auto d_union_nulls, MakeArrayOfNull(d_union_ty,
union_length));
+ ASSERT_EQ(d_union_nulls->null_count(), 0);
+ {
+ const auto& typed_union = checked_cast<const
DenseUnionArray&>(*d_union_nulls);
+
+ // Child field has length 1 which is a null element
+ ASSERT_EQ(typed_union.field(0)->length(), 1);
+ ASSERT_EQ(typed_union.field(0)->null_count(), 1);
+
+ // Check type codes are all 0 and the offsets point to the first element of
+ // the first child
+ for (int i = 0; i < union_length; ++i) {
+ ASSERT_EQ(typed_union.raw_type_codes()[i], 0);
+ ASSERT_EQ(typed_union.raw_value_offsets()[i], 0);
+ }
+ }
+}
+
TEST_F(TestArray, TestMakeArrayFromScalar) {
auto hello = Buffer::FromString("hello");
ScalarVector scalars{std::make_shared<BooleanScalar>(false),
diff --git a/cpp/src/arrow/array/array_union_test.cc
b/cpp/src/arrow/array/array_union_test.cc
index b346efe..a32b8b8 100644
--- a/cpp/src/arrow/array/array_union_test.cc
+++ b/cpp/src/arrow/array/array_union_test.cc
@@ -62,8 +62,8 @@ TEST(TestUnionArray, TestSliceEquals) {
TestInitialized(*array);
};
+ CheckUnion(batch->column(0));
CheckUnion(batch->column(1));
- CheckUnion(batch->column(2));
}
TEST(TestSparseUnionArray, Validate) {
@@ -75,25 +75,25 @@ TEST(TestSparseUnionArray, Validate) {
auto arr = std::make_shared<SparseUnionArray>(type, 2, children, type_ids);
ASSERT_OK(arr->ValidateFull());
- arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
nullptr, 0,
+ arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
/*offset=*/1);
ASSERT_OK(arr->ValidateFull());
- arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
nullptr, 0,
+ arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
/*offset=*/2);
ASSERT_OK(arr->ValidateFull());
// Length + offset < child length, but it's ok
- arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
nullptr, 0,
+ arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
/*offset=*/0);
ASSERT_OK(arr->ValidateFull());
// Length + offset > child length
- arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
nullptr, 0,
+ arr = std::make_shared<SparseUnionArray>(type, 1, children, type_ids,
/*offset=*/2);
ASSERT_RAISES(Invalid, arr->ValidateFull());
// Offset > child length
- arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
nullptr, 0,
+ arr = std::make_shared<SparseUnionArray>(type, 0, children, type_ids,
/*offset=*/3);
ASSERT_RAISES(Invalid, arr->ValidateFull());
}
diff --git a/cpp/src/arrow/array/array_view_test.cc
b/cpp/src/arrow/array/array_view_test.cc
index dc61a0d..3aac62d 100644
--- a/cpp/src/arrow/array/array_view_test.cc
+++ b/cpp/src/arrow/array/array_view_test.cc
@@ -340,23 +340,6 @@ TEST(TestArrayView, SparseUnionAsStruct) {
auto expected = ArrayFromJSON(ty1, "[[0, 0, 0], [0, 65535, 1.5], [1, 42,
-2.5]]");
CheckView(arr, expected);
CheckView(expected, arr);
-
- // With nulls
- indices = ArrayFromJSON(int8(), "[null, 0, 1]");
- ASSERT_OK_AND_ASSIGN(arr, SparseUnionArray::Make(*indices, {child1,
child2}));
- ASSERT_OK(arr->ValidateFull());
- expected = ArrayFromJSON(ty1, "[null, [0, 65535, 1.5], [1, 42, -2.5]]");
- CheckView(arr, expected);
- // CheckView(expected, arr); // XXX currently fails
-
- // With nested nulls
- child1 = ArrayFromJSON(int16(), "[0, -1, null]");
- child2 = ArrayFromJSON(int32(), "[0, null, -1071644672]");
- ASSERT_OK_AND_ASSIGN(arr, SparseUnionArray::Make(*indices, {child1,
child2}));
- ASSERT_OK(arr->ValidateFull());
- expected = ArrayFromJSON(ty1, "[null, [0, 65535, null], [1, null, -2.5]]");
- CheckView(arr, expected);
- // CheckView(expected, arr); // XXX currently fails
}
TEST(TestArrayView, DecimalRoundTrip) {
diff --git a/cpp/src/arrow/array/builder_base.h
b/cpp/src/arrow/array/builder_base.h
index 1054255..8d327b7 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -58,7 +58,7 @@ class ARROW_EXPORT ArrayBuilder {
int num_children() const { return static_cast<int>(children_.size()); }
- int64_t length() const { return length_; }
+ virtual int64_t length() const { return length_; }
int64_t null_count() const { return null_count_; }
int64_t capacity() const { return capacity_; }
diff --git a/cpp/src/arrow/array/builder_nested.cc
b/cpp/src/arrow/array/builder_nested.cc
index 1e46ba4..b8af62f 100644
--- a/cpp/src/arrow/array/builder_nested.cc
+++ b/cpp/src/arrow/array/builder_nested.cc
@@ -208,6 +208,9 @@ void StructBuilder::Reset() {
}
Status StructBuilder::AppendNulls(int64_t length) {
+ for (const auto& field : children_) {
+ RETURN_NOT_OK(field->AppendNulls(length));
+ }
ARROW_RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(length, false);
return Status::OK();
diff --git a/cpp/src/arrow/array/builder_nested.h
b/cpp/src/arrow/array/builder_nested.h
index 653d5a9..cd6fadf 100644
--- a/cpp/src/arrow/array/builder_nested.h
+++ b/cpp/src/arrow/array/builder_nested.h
@@ -395,8 +395,17 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}
- Status AppendNull() final { return Append(false); }
+ /// \brief Append a null value. Automatically appends a null to each child
+ /// builder.
+ Status AppendNull() final {
+ for (const auto& field : children_) {
+ ARROW_RETURN_NOT_OK(field->AppendNull());
+ }
+ return Append(false);
+ }
+ /// \brief Append multiple null values. Automatically appends nulls to each
+ /// child builder.
Status AppendNulls(int64_t length) final;
void Reset() override;
diff --git a/cpp/src/arrow/array/builder_union.cc
b/cpp/src/arrow/array/builder_union.cc
index a5e4c13..90d4f42 100644
--- a/cpp/src/arrow/array/builder_union.cc
+++ b/cpp/src/arrow/array/builder_union.cc
@@ -30,8 +30,9 @@ using internal::checked_cast;
using internal::checked_pointer_cast;
Status BasicUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
- std::shared_ptr<Buffer> types, null_bitmap;
- RETURN_NOT_OK(null_bitmap_builder_.Finish(&null_bitmap));
+ int64_t length = types_builder_.length();
+
+ std::shared_ptr<Buffer> types;
RETURN_NOT_OK(types_builder_.Finish(&types));
std::vector<std::shared_ptr<ArrayData>> child_data(children_.size());
@@ -39,7 +40,7 @@ Status
BasicUnionBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
RETURN_NOT_OK(children_[i]->FinishInternal(&child_data[i]));
}
- *out = ArrayData::Make(type(), length(), {null_bitmap, types}, null_count_);
+ *out = ArrayData::Make(type(), length, {nullptr, types}, /*null_count=*/0);
(*out)->child_data = std::move(child_data);
return Status::OK();
}
@@ -79,13 +80,10 @@ BasicUnionBuilder::BasicUnionBuilder(
int8_t BasicUnionBuilder::AppendChild(const std::shared_ptr<ArrayBuilder>&
new_child,
const std::string& field_name) {
children_.push_back(new_child);
-
auto new_type_id = NextTypeId();
type_id_to_children_[new_type_id] = new_child.get();
-
child_fields_.push_back(field(field_name, nullptr));
-
type_codes_.push_back(static_cast<int8_t>(new_type_id));
return new_type_id;
diff --git a/cpp/src/arrow/array/builder_union.h
b/cpp/src/arrow/array/builder_union.h
index e337673..1ccc7ef 100644
--- a/cpp/src/arrow/array/builder_union.h
+++ b/cpp/src/arrow/array/builder_union.h
@@ -33,6 +33,10 @@
namespace arrow {
+/// \brief Base class for union array builds.
+///
+/// Note that while we subclass ArrayBuilder, as union types do not have a
+/// validity bitmap, the bitmap builder member of ArrayBuilder is not used.
class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
public:
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
@@ -56,6 +60,8 @@ class ARROW_EXPORT BasicUnionBuilder : public ArrayBuilder {
std::shared_ptr<DataType> type() const override;
+ int64_t length() const override { return types_builder_.length(); }
+
protected:
BasicUnionBuilder(MemoryPool* pool,
const std::vector<std::shared_ptr<ArrayBuilder>>& children,
@@ -92,15 +98,23 @@ class ARROW_EXPORT DenseUnionBuilder : public
BasicUnionBuilder {
: BasicUnionBuilder(pool, children, type), offsets_builder_(pool) {}
Status AppendNull() final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(0));
- ARROW_RETURN_NOT_OK(offsets_builder_.Append(0));
- return AppendToBitmap(false);
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(first_child_code));
+ ARROW_RETURN_NOT_OK(
+
offsets_builder_.Append(static_cast<int32_t>(child_builder->length())));
+ // Append a null arbitrarily to the first child
+ return child_builder->AppendNull();
}
Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(length, 0));
- ARROW_RETURN_NOT_OK(offsets_builder_.Append(length, 0));
- return AppendToBitmap(length, false);
+ const int8_t first_child_code = type_codes_[0];
+ ArrayBuilder* child_builder = type_id_to_children_[first_child_code];
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, first_child_code));
+ ARROW_RETURN_NOT_OK(
+ offsets_builder_.Append(length,
static_cast<int32_t>(child_builder->length())));
+ // Append just a single null to the first child
+ return child_builder->AppendNull();
}
/// \brief Append an element to the UnionArray. This must be followed
@@ -118,8 +132,7 @@ class ARROW_EXPORT DenseUnionBuilder : public
BasicUnionBuilder {
"child");
}
auto offset =
static_cast<int32_t>(type_id_to_children_[next_type]->length());
- ARROW_RETURN_NOT_OK(offsets_builder_.Append(offset));
- return AppendToBitmap(true);
+ return offsets_builder_.Append(offset);
}
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
@@ -146,14 +159,25 @@ class ARROW_EXPORT SparseUnionBuilder : public
BasicUnionBuilder {
const std::shared_ptr<DataType>& type)
: BasicUnionBuilder(pool, children, type) {}
+ /// \brief Append a null value. A null is added automatically to all the
+ /// children but the type id in the slot will be 0
Status AppendNull() final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(0));
- return AppendToBitmap(false);
+ ARROW_RETURN_NOT_OK(types_builder_.Append(type_codes_[0]));
+ for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendNull());
+ }
+ return Status::OK();
}
+ /// \brief Append multiple null values. Nulls will be automatically appended
+ /// to all the children but the type ids will be all 0.
Status AppendNulls(int64_t length) final {
- ARROW_RETURN_NOT_OK(types_builder_.Append(length, 0));
- return AppendToBitmap(length, false);
+ ARROW_RETURN_NOT_OK(types_builder_.Append(length, type_codes_[0]));
+ // Append nulls to children
+ for (int8_t code : type_codes_) {
+ ARROW_RETURN_NOT_OK(type_id_to_children_[code]->AppendNulls(length));
+ }
+ return Status::OK();
}
/// \brief Append an element to the UnionArray. This must be followed
@@ -163,10 +187,7 @@ class ARROW_EXPORT SparseUnionBuilder : public
BasicUnionBuilder {
///
/// The corresponding child builder must be appended to independently after
this method
/// is called, and all other child builders must have null appended
- Status Append(int8_t next_type) {
- ARROW_RETURN_NOT_OK(types_builder_.Append(next_type));
- return AppendToBitmap(true);
- }
+ Status Append(int8_t next_type) { return types_builder_.Append(next_type); }
};
} // namespace arrow
diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc
index f9558a4..c20096b 100644
--- a/cpp/src/arrow/array/data.cc
+++ b/cpp/src/arrow/array/data.cc
@@ -36,9 +36,10 @@ namespace arrow {
using internal::CountSetBits;
-static inline void AdjustNonNullable(std::vector<std::shared_ptr<Buffer>>*
buffers,
+static inline void AdjustNonNullable(Type::type type_id,
+ std::vector<std::shared_ptr<Buffer>>*
buffers,
int64_t* null_count) {
- if (buffers->size() > 0) {
+ if (internal::HasValidityBitmap(type_id)) {
if (*null_count == 0) {
// In case there are no nulls, don't keep an allocated null bitmap around
(*buffers)[0] = nullptr;
@@ -46,6 +47,8 @@ static inline void
AdjustNonNullable(std::vector<std::shared_ptr<Buffer>>* buffe
// Conversely, if no null bitmap is provided, set the null count to 0
*null_count = 0;
}
+ } else {
+ *null_count = 0;
}
}
@@ -53,7 +56,7 @@ std::shared_ptr<ArrayData> ArrayData::Make(const
std::shared_ptr<DataType>& type
int64_t length,
std::vector<std::shared_ptr<Buffer>> buffers,
int64_t null_count, int64_t offset)
{
- AdjustNonNullable(&buffers, &null_count);
+ AdjustNonNullable(type->id(), &buffers, &null_count);
return std::make_shared<ArrayData>(type, length, std::move(buffers),
null_count,
offset);
}
@@ -63,7 +66,7 @@ std::shared_ptr<ArrayData> ArrayData::Make(
std::vector<std::shared_ptr<Buffer>> buffers,
std::vector<std::shared_ptr<ArrayData>> child_data, int64_t null_count,
int64_t offset) {
- AdjustNonNullable(&buffers, &null_count);
+ AdjustNonNullable(type->id(), &buffers, &null_count);
return std::make_shared<ArrayData>(type, length, std::move(buffers),
std::move(child_data), null_count,
offset);
}
@@ -73,7 +76,7 @@ std::shared_ptr<ArrayData> ArrayData::Make(
std::vector<std::shared_ptr<Buffer>> buffers,
std::vector<std::shared_ptr<ArrayData>> child_data,
std::shared_ptr<ArrayData> dictionary, int64_t null_count, int64_t offset)
{
- AdjustNonNullable(&buffers, &null_count);
+ AdjustNonNullable(type->id(), &buffers, &null_count);
auto data = std::make_shared<ArrayData>(type, length, std::move(buffers),
std::move(child_data), null_count,
offset);
data->dictionary = std::move(dictionary);
@@ -217,18 +220,10 @@ struct ViewDataImpl {
// No type has a purely empty layout
DCHECK_GT(out_layout.buffers.size(), 0);
- if (out_layout.buffers[0].kind == DataTypeLayout::ALWAYS_NULL) {
- // Assuming null type or equivalent.
- DCHECK_EQ(out_layout.buffers.size(), 1);
- *out = ArrayData::Make(out_type, out_length, {nullptr}, out_length);
- return Status::OK();
- }
-
std::vector<std::shared_ptr<Buffer>> out_buffers;
// Process null bitmap
- DCHECK_EQ(out_layout.buffers[0].kind, DataTypeLayout::BITMAP);
- if (in_buffer_idx == 0) {
+ if (in_buffer_idx == 0 && out_layout.buffers[0].kind ==
DataTypeLayout::BITMAP) {
// Copy input null bitmap
RETURN_NOT_OK(CheckInputAvailable());
const auto& in_data_item = in_data[in_layout_idx];
diff --git a/cpp/src/arrow/array/diff.cc b/cpp/src/arrow/array/diff.cc
index 6b98feb..a94ca17 100644
--- a/cpp/src/arrow/array/diff.cc
+++ b/cpp/src/arrow/array/diff.cc
@@ -556,13 +556,13 @@ class MakeFormatterImpl {
void DoFormat(const UnionArray& array, int64_t index, int64_t
child_index,
std::ostream* os) {
auto type_code = array.raw_type_codes()[index];
- const auto& child = *array.field(array.child_id(index));
+ auto child = array.field(array.child_id(index));
*os << "{" << static_cast<int16_t>(type_code) << ": ";
- if (child.IsNull(child_index)) {
+ if (child->IsNull(child_index)) {
*os << "null";
} else {
- field_formatters_[type_code](child, child_index, os);
+ field_formatters_[type_code](*child, child_index, os);
}
*os << "}";
}
diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc
index 915482b..6b2a22a 100644
--- a/cpp/src/arrow/array/util.cc
+++ b/cpp/src/arrow/array/util.cc
@@ -199,7 +199,10 @@ class NullArrayFactory {
return out_;
}
- Status Visit(const NullType&) { return Status::OK(); }
+ Status Visit(const NullType&) {
+ out_->buffers.resize(1, nullptr);
+ return Status::OK();
+ }
Status Visit(const FixedWidthType&) {
out_->buffers.resize(2, buffer_);
@@ -233,10 +236,28 @@ class NullArrayFactory {
}
Status Visit(const UnionType& type) {
- auto n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
- out_->buffers.resize(n_buffers, buffer_);
+ out_->buffers.resize(2);
+
+ // First buffer is always null
+ out_->buffers[0] = nullptr;
+
+ // Type codes are all zero, so we can use buffer_ which has had it's memory
+ // zeroed
+ out_->buffers[1] = buffer_;
+
+ // For sparse unions, we now create children with the same length as the
+ // parent
+ int64_t child_length = length_;
+ if (type.mode() == UnionMode::DENSE) {
+ // For dense unions, we set the offsets to all zero and create children
+ // with length 1
+ out_->buffers.resize(3);
+ out_->buffers[2] = buffer_;
+
+ child_length = 1;
+ }
for (int i = 0; i < type_->num_fields(); ++i) {
- ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, length_));
+ ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(i, child_length));
}
return Status::OK();
}
diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc
index 1ca69bf..cbc424a 100644
--- a/cpp/src/arrow/ipc/json_simple.cc
+++ b/cpp/src/arrow/ipc/json_simple.cc
@@ -65,7 +65,7 @@ class Converter {
virtual Status AppendValue(const rj::Value& json_obj) = 0;
- virtual Status AppendNull() = 0;
+ Status AppendNull() { return this->builder()->AppendNull(); }
virtual Status AppendValues(const rj::Value& json_array) = 0;
@@ -113,8 +113,6 @@ class NullConverter final : public
ConcreteConverter<NullConverter> {
builder_ = std::make_shared<NullBuilder>();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return AppendNull();
@@ -138,8 +136,6 @@ class BooleanConverter final : public
ConcreteConverter<BooleanConverter> {
builder_ = std::make_shared<BooleanBuilder>();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
return AppendNull();
@@ -232,11 +228,9 @@ class IntegerConverter final : public
ConcreteConverter<IntegerConverter<Type>>
return Status::OK();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
c_type value;
RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
@@ -262,11 +256,9 @@ class FloatConverter final : public
ConcreteConverter<FloatConverter<Type>> {
builder_ = std::make_shared<NumericBuilder<Type>>();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
c_type value;
RETURN_NOT_OK(ConvertNumber<Type>(json_obj, *this->type_, &value));
@@ -290,11 +282,9 @@ class DecimalConverter final : public
ConcreteConverter<DecimalConverter> {
builder_ = std::make_shared<DecimalBuilder>(type);
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
if (json_obj.IsString()) {
int32_t precision, scale;
@@ -328,11 +318,9 @@ class TimestampConverter final : public
ConcreteConverter<TimestampConverter> {
builder_ = std::make_shared<TimestampBuilder>(type, default_memory_pool());
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
int64_t value;
if (json_obj.IsNumber()) {
@@ -366,11 +354,9 @@ class DayTimeIntervalConverter final
builder_ = std::make_shared<DayTimeIntervalBuilder>(default_memory_pool());
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
DayTimeIntervalType::DayMilliseconds value;
if (!json_obj.IsArray()) {
@@ -405,11 +391,9 @@ class StringConverter final : public
ConcreteConverter<StringConverter<TYPE>> {
builder_ = std::make_shared<BuilderType>(type, default_memory_pool());
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
if (json_obj.IsString()) {
auto view = util::string_view(json_obj.GetString(),
json_obj.GetStringLength());
@@ -436,11 +420,9 @@ class FixedSizeBinaryConverter final
builder_ = std::make_shared<FixedSizeBinaryBuilder>(type,
default_memory_pool());
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
if (json_obj.IsString()) {
auto view = util::string_view(json_obj.GetString(),
json_obj.GetStringLength());
@@ -481,11 +463,9 @@ class ListConverter final : public
ConcreteConverter<ListConverter<TYPE>> {
return Status::OK();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
RETURN_NOT_OK(builder_->Append());
// Extend the child converter with this JSON array
@@ -517,11 +497,9 @@ class MapConverter final : public
ConcreteConverter<MapConverter> {
return Status::OK();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
RETURN_NOT_OK(builder_->Append());
if (!json_obj.IsArray()) {
@@ -570,11 +548,9 @@ class FixedSizeListConverter final : public
ConcreteConverter<FixedSizeListConve
return Status::OK();
}
- Status AppendNull() override { return builder_->AppendNull(); }
-
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
RETURN_NOT_OK(builder_->Append());
// Extend the child converter with this JSON array
@@ -613,19 +589,12 @@ class StructConverter final : public
ConcreteConverter<StructConverter> {
return Status::OK();
}
- Status AppendNull() override {
- for (auto& converter : child_converters_) {
- RETURN_NOT_OK(converter->AppendNull());
- }
- return builder_->AppendNull();
- }
-
// Append a JSON value that is either an array of N elements in order
// or an object mapping struct names to values (omitted struct members
// are mapped to null).
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
if (json_obj.IsArray()) {
auto size = json_obj.Size();
@@ -701,20 +670,11 @@ class UnionConverter final : public
ConcreteConverter<UnionConverter> {
return Status::OK();
}
- Status AppendNull() override {
- if (mode_ == UnionMode::SPARSE) {
- for (auto& converter : child_converters_) {
- RETURN_NOT_OK(converter->AppendNull());
- }
- }
- return builder_->AppendNull();
- }
-
// Append a JSON value that must be a 2-long array, containing the type_id
// and value of the UnionArray's slot.
Status AppendValue(const rj::Value& json_obj) override {
if (json_obj.IsNull()) {
- return AppendNull();
+ return this->AppendNull();
}
if (!json_obj.IsArray()) {
return JSONTypeError("array", json_obj.GetType());
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc
b/cpp/src/arrow/ipc/json_simple_test.cc
index bf6098d..fe1b027 100644
--- a/cpp/src/arrow/ipc/json_simple_test.cc
+++ b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -998,9 +998,9 @@ TEST(TestDenseUnion, Basics) {
auto array = checked_pointer_cast<DenseUnionArray>(
ArrayFromJSON(type, "[null, [4, 122], [8, true], [4, null], null, [8,
false]]"));
- auto expected_types = ArrayFromJSON(int8(), "[null, 4, 8, 4, null, 8]");
- auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 0, 1]");
- auto expected_a = ArrayFromJSON(int8(), "[122, null]");
+ auto expected_types = ArrayFromJSON(int8(), "[4, 4, 8, 4, 4, 8]");
+ auto expected_offsets = ArrayFromJSON(int32(), "[0, 1, 0, 2, 3, 1]");
+ auto expected_a = ArrayFromJSON(int8(), "[null, 122, null, null]");
auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
ASSERT_OK_AND_ASSIGN(
@@ -1022,7 +1022,7 @@ TEST(TestSparseUnion, Basics) {
auto type = sparse_union({field_a, field_b}, {4, 8});
auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8,
false]]");
- auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+ auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]");
auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null,
false]");
@@ -1045,9 +1045,9 @@ TEST(TestDenseUnion, ListOfUnion) {
"[[4, null], null, [8,
false]]"
"]"));
- auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
- auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
- auto expected_a = ArrayFromJSON(int8(), "[122, null]");
+ auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]");
+ auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]");
+ auto expected_a = ArrayFromJSON(int8(), "[122, null, null]");
auto expected_b = ArrayFromJSON(boolean(), "[true, false]");
ASSERT_OK_AND_ASSIGN(
@@ -1079,7 +1079,7 @@ TEST(TestSparseUnion, ListOfUnion) {
"[[4, null], null, [8, false]]"
"]");
- auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]");
+ auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, 4, 8]");
auto expected_a = ArrayFromJSON(int8(), "[122, null, null, null, null]");
auto expected_b = ArrayFromJSON(boolean(), "[null, true, null, null,
false]");
@@ -1108,11 +1108,12 @@ TEST(TestDenseUnion, UnionOfStructs) {
[23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
])"));
- auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
- auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]");
+ auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, 0, 23]");
+ auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 1]");
ArrayVector expected_fields = {ArrayFromJSON(fields[0]->type(), R"([
{"alpha": 0.0, "bravo": "charlie"},
- {"bravo": "mike"}
+ {"bravo": "mike"},
+ null
])"),
ArrayFromJSON(fields[1]->type(), R"([
{"whiskey": 99},
@@ -1150,7 +1151,7 @@ TEST(TestSparseUnion, UnionOfStructs) {
[23, {"tango": 8.25, "foxtrot": [0, 2, 3]}]
])");
- auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]");
+ auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, 0, 23]");
ArrayVector expected_fields = {
ArrayFromJSON(fields[0]->type(), R"([
{"alpha": 0.0, "bravo": "charlie"},
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index faab092..8488457 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -35,6 +35,7 @@
#include "arrow/io/memory.h"
#include "arrow/ipc/message.h"
#include "arrow/ipc/metadata_internal.h"
+#include "arrow/ipc/util.h"
#include "arrow/ipc/writer.h"
#include "arrow/record_batch.h"
#include "arrow/sparse_tensor.h"
@@ -178,27 +179,29 @@ class ArrayLoader {
return Status::OK();
}
- Status LoadCommon() {
+ Status LoadCommon(Type::type type_id) {
// This only contains the length and null count, which we need to figure
// out what to do with the buffers. For example, if null_count == 0, then
// we can skip that buffer without reading from shared memory
RETURN_NOT_OK(GetFieldMetadata(field_index_++, out_));
- // extract null_bitmap which is common to all arrays
- if (out_->null_count == 0) {
- out_->buffers[0] = nullptr;
- } else {
- RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+ if (::arrow::internal::HasValidityBitmap(type_id)) {
+ // Extract null_bitmap which is common to all arrays except for unions.
+ if (out_->null_count == 0) {
+ out_->buffers[0] = nullptr;
+ } else {
+ RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+ }
+ buffer_index_++;
}
- buffer_index_++;
return Status::OK();
}
template <typename TYPE>
- Status LoadPrimitive() {
+ Status LoadPrimitive(Type::type type_id) {
out_->buffers.resize(2);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type_id));
if (out_->length > 0) {
RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
} else {
@@ -209,10 +212,10 @@ class ArrayLoader {
}
template <typename TYPE>
- Status LoadBinary() {
+ Status LoadBinary(Type::type type_id) {
out_->buffers.resize(3);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type_id));
RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
return GetBuffer(buffer_index_++, &out_->buffers[2]);
}
@@ -221,7 +224,7 @@ class ArrayLoader {
Status LoadList(const TYPE& type) {
out_->buffers.resize(2);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type.id()));
RETURN_NOT_OK(GetBuffer(buffer_index_++, &out_->buffers[1]));
const int num_children = type.num_fields();
@@ -259,17 +262,17 @@ class ArrayLoader {
!std::is_base_of<DictionaryType, T>::value,
Status>
Visit(const T& type) {
- return LoadPrimitive<T>();
+ return LoadPrimitive<T>(type.id());
}
template <typename T>
enable_if_base_binary<T, Status> Visit(const T& type) {
- return LoadBinary<T>();
+ return LoadBinary<T>(type.id());
}
Status Visit(const FixedSizeBinaryType& type) {
out_->buffers.resize(2);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type.id()));
return GetBuffer(buffer_index_++, &out_->buffers[1]);
}
@@ -286,7 +289,7 @@ class ArrayLoader {
Status Visit(const FixedSizeListType& type) {
out_->buffers.resize(1);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type.id()));
const int num_children = type.num_fields();
if (num_children != 1) {
@@ -298,7 +301,7 @@ class ArrayLoader {
Status Visit(const StructType& type) {
out_->buffers.resize(1);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type.id()));
return LoadChildren(type.fields());
}
@@ -306,7 +309,12 @@ class ArrayLoader {
int n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
out_->buffers.resize(n_buffers);
- RETURN_NOT_OK(LoadCommon());
+ RETURN_NOT_OK(LoadCommon(type.id()));
+
+ // Validity bitmap placeholder like for NullType, which is never sent or
+ // received in IPC.
+ out_->buffers[0] = nullptr;
+
if (out_->length > 0) {
RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[1]));
if (type.mode() == UnionMode::DENSE) {
diff --git a/cpp/src/arrow/ipc/test_common.cc b/cpp/src/arrow/ipc/test_common.cc
index 9738e87..4400fee 100644
--- a/cpp/src/arrow/ipc/test_common.cc
+++ b/cpp/src/arrow/ipc/test_common.cc
@@ -443,11 +443,10 @@ Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
auto sparse_type = sparse_union(union_fields, type_codes);
auto dense_type = dense_union(union_fields, type_codes);
- auto f0 = field("sparse_nonnull", sparse_type, false);
- auto f1 = field("sparse", sparse_type);
- auto f2 = field("dense", dense_type);
+ auto f0 = field("sparse", sparse_type);
+ auto f1 = field("dense", dense_type);
- auto schema = ::arrow::schema({f0, f1, f2});
+ auto schema = ::arrow::schema({f0, f1});
// Create data
std::vector<std::shared_ptr<Array>> sparse_children(2);
@@ -476,22 +475,13 @@ Status MakeUnion(std::shared_ptr<RecordBatch>* out) {
std::vector<int32_t> offsets = {0, 0, 1, 2, 1, 2, 3};
RETURN_NOT_OK(CopyBufferFromVector(offsets, default_memory_pool(),
&offsets_buffer));
- std::vector<uint8_t> null_bytes(length, 1);
- null_bytes[2] = 0;
- ARROW_ASSIGN_OR_RAISE(auto null_bitmap, internal::BytesToBits(null_bytes));
-
- // construct individual nullable/non-nullable struct arrays
- auto sparse_no_nulls = std::make_shared<SparseUnionArray>(
- sparse_type, length, sparse_children, type_ids_buffer);
auto sparse = std::make_shared<SparseUnionArray>(sparse_type, length,
sparse_children,
- type_ids_buffer,
null_bitmap, 1);
-
- auto dense =
- std::make_shared<DenseUnionArray>(dense_type, length, dense_children,
- type_ids_buffer, offsets_buffer,
null_bitmap, 1);
+ type_ids_buffer);
+ auto dense = std::make_shared<DenseUnionArray>(dense_type, length,
dense_children,
+ type_ids_buffer,
offsets_buffer);
// construct batch
- std::vector<std::shared_ptr<Array>> arrays = {sparse_no_nulls, sparse,
dense};
+ std::vector<std::shared_ptr<Array>> arrays = {sparse, dense};
*out = RecordBatch::Make(schema, length, arrays);
return Status::OK();
}
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 4db6136..16e97c6 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -137,8 +137,8 @@ class RecordBatchSerializer {
// push back all common elements
field_nodes_.push_back({arr.length(), arr.null_count(), 0});
- // Null type has no validity bitmap
- if (arr.type_id() != Type::NA) {
+ // Null and union types have no validity bitmap
+ if (::arrow::internal::HasValidityBitmap(arr.type_id())) {
if (arr.null_count() > 0) {
std::shared_ptr<Buffer> bitmap;
RETURN_NOT_OK(GetTruncatedBitmap(arr.offset(), arr.length(),
arr.null_bitmap(),
diff --git a/cpp/src/arrow/python/deserialize.cc
b/cpp/src/arrow/python/deserialize.cc
index c7d99d2..961a168 100644
--- a/cpp/src/arrow/python/deserialize.cc
+++ b/cpp/src/arrow/python/deserialize.cc
@@ -120,6 +120,10 @@ Status DeserializeArray(int32_t index, PyObject* base,
const SerializedPyObject&
Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t
type,
PyObject* base, const SerializedPyObject& blobs, PyObject**
result) {
switch (type) {
+ case PythonType::NONE:
+ Py_INCREF(Py_None);
+ *result = Py_None;
+ return Status::OK();
case PythonType::BOOL:
*result = PyBool_FromLong(checked_cast<const
BooleanArray&>(arr).Value(index));
return Status::OK();
@@ -257,17 +261,12 @@ Status DeserializeSequence(PyObject* context, const
Array& array, int64_t start_
std::vector<int8_t> python_types;
RETURN_NOT_OK(GetPythonTypes(data, &python_types));
for (int64_t i = start_idx; i < stop_idx; ++i) {
- if (data.IsNull(i)) {
- Py_INCREF(Py_None);
- RETURN_NOT_OK(set_item(result.obj(), i - start_idx, Py_None));
- } else {
- const int64_t offset = value_offsets[i];
- const uint8_t type = type_codes[i];
- PyObject* value;
- RETURN_NOT_OK(GetValue(context, *data.field(type), offset,
python_types[type], base,
- blobs, &value));
- RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
- }
+ const int64_t offset = value_offsets[i];
+ const uint8_t type = type_codes[i];
+ PyObject* value;
+ RETURN_NOT_OK(GetValue(context, *data.field(type), offset,
python_types[type], base,
+ blobs, &value));
+ RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
}
*out = result.detach();
return Status::OK();
diff --git a/cpp/src/arrow/python/python_to_arrow.cc
b/cpp/src/arrow/python/python_to_arrow.cc
index 597ee67..66a1e41 100644
--- a/cpp/src/arrow/python/python_to_arrow.cc
+++ b/cpp/src/arrow/python/python_to_arrow.cc
@@ -995,15 +995,7 @@ class StructConverter : public TypedConverter<StructType,
null_coding> {
}
// Append a missing item
- Status AppendNull() override {
- RETURN_NOT_OK(this->typed_builder_->AppendNull());
- // Need to also insert a missing item on all child builders
- // (compare with ListConverter)
- for (int i = 0; i < num_fields_; i++) {
- RETURN_NOT_OK(this->value_converters_[i]->Append(Py_None));
- }
- return Status::OK();
- }
+ Status AppendNull() override { return this->typed_builder_->AppendNull(); }
protected:
Status AppendDictItem(PyObject* obj) {
diff --git a/cpp/src/arrow/python/serialize.cc
b/cpp/src/arrow/python/serialize.cc
index a4645b7..cefa97a 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -71,7 +71,9 @@ class SequenceBuilder {
types_(::arrow::int8(), pool),
offsets_(::arrow::int32(), pool),
type_map_(PythonType::NUM_PYTHON_TYPES, -1) {
- builder_.reset(new DenseUnionBuilder(pool));
+ auto null_builder = std::make_shared<NullBuilder>(pool);
+ auto initial_ty = dense_union({field("0", null())});
+ builder_.reset(new DenseUnionBuilder(pool, {null_builder}, initial_ty));
}
// Appending a none to the sequence
diff --git a/cpp/src/arrow/python/serialize.h b/cpp/src/arrow/python/serialize.h
index 191e279..fd207d3 100644
--- a/cpp/src/arrow/python/serialize.h
+++ b/cpp/src/arrow/python/serialize.h
@@ -115,6 +115,7 @@ Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
struct PythonType {
enum type {
+ NONE,
BOOL,
INT,
PY2INT, // Kept for compatibility
diff --git a/cpp/src/arrow/testing/json_internal.cc
b/cpp/src/arrow/testing/json_internal.cc
index 023c05d..1ddaf3a 100644
--- a/cpp/src/arrow/testing/json_internal.cc
+++ b/cpp/src/arrow/testing/json_internal.cc
@@ -664,9 +664,7 @@ class ArrayWriter {
}
Status Visit(const UnionArray& array) {
- WriteValidityField(array);
const auto& type = checked_cast<const UnionType&>(*array.type());
-
WriteIntegerField("TYPE_ID", array.raw_type_codes(), array.length());
if (type.mode() == UnionMode::DENSE) {
auto offsets = checked_cast<const
DenseUnionArray&>(array).raw_value_offsets();
@@ -1473,13 +1471,8 @@ class ArrayReader {
}
Status Visit(const UnionType& type) {
- int32_t null_count = 0;
-
- std::shared_ptr<Buffer> validity_buffer;
std::shared_ptr<Buffer> type_id_buffer;
- RETURN_NOT_OK(GetValidityBuffer(is_valid_, &null_count, &validity_buffer));
-
const auto& json_type_ids = obj_.FindMember("TYPE_ID");
RETURN_NOT_ARRAY("TYPE_ID", json_type_ids, obj_);
RETURN_NOT_OK(
@@ -1489,8 +1482,8 @@ class ArrayReader {
RETURN_NOT_OK(GetChildren(obj_, type, &children));
if (type.mode() == UnionMode::SPARSE) {
- result_ = std::make_shared<SparseUnionArray>(
- type_, length_, children, type_id_buffer, validity_buffer,
null_count);
+ result_ =
+ std::make_shared<SparseUnionArray>(type_, length_, children,
type_id_buffer);
} else {
const auto& json_offsets = obj_.FindMember("OFFSET");
RETURN_NOT_ARRAY("OFFSET", json_offsets, obj_);
@@ -1499,9 +1492,8 @@ class ArrayReader {
RETURN_NOT_OK(
GetIntArray<int32_t>(json_offsets->value.GetArray(), length_,
&offsets_buffer));
- result_ =
- std::make_shared<DenseUnionArray>(type_, length_, children,
type_id_buffer,
- offsets_buffer, validity_buffer,
null_count);
+ result_ = std::make_shared<DenseUnionArray>(type_, length_, children,
+ type_id_buffer,
offsets_buffer);
}
return Status::OK();
@@ -1609,8 +1601,8 @@ class ArrayReader {
Status Parse(std::shared_ptr<Array>* out) {
RETURN_NOT_OK(GetObjectInt(obj_, "count", &length_));
- if (type_->id() != Type::NA) {
- // Null type is the only type without any buffers
+ if (::arrow::internal::HasValidityBitmap(type_->id())) {
+ // Null and union types don't have a validity bitmap
RETURN_NOT_OK(ParseValidityBitmap());
}
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 1ebf7f5..72bdeec 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -593,9 +593,9 @@ Status UnionType::ValidateParameters(const
std::vector<std::shared_ptr<Field>>&
DataTypeLayout UnionType::layout() const {
if (mode() == UnionMode::SPARSE) {
return DataTypeLayout(
- {DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(uint8_t))});
+ {DataTypeLayout::AlwaysNull(),
DataTypeLayout::FixedWidth(sizeof(uint8_t))});
} else {
- return DataTypeLayout({DataTypeLayout::Bitmap(),
+ return DataTypeLayout({DataTypeLayout::AlwaysNull(),
DataTypeLayout::FixedWidth(sizeof(uint8_t)),
DataTypeLayout::FixedWidth(sizeof(int32_t))});
}
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index c343a98..7cb39d4 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -1921,6 +1921,17 @@ Result<std::shared_ptr<Schema>> UnifySchemas(
namespace internal {
+static inline bool HasValidityBitmap(Type::type id) {
+ switch (id) {
+ case Type::NA:
+ case Type::DENSE_UNION:
+ case Type::SPARSE_UNION:
+ return false;
+ default:
+ return true;
+ }
+}
+
ARROW_EXPORT
std::string ToString(Type::type id);
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 6a23d0b..f4522b8 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -911,50 +911,40 @@ class _BaseUnionField(Field):
def _get_children(self):
return [field.get_json() for field in self.fields]
- def _make_type_ids(self, is_valid):
- type_ids = np.random.choice(self.type_ids, len(is_valid))
- # Mark 0 for null entries (mimics C++ UnionBuilder behaviour)
- return np.choose(is_valid, [0, type_ids])
+ def _make_type_ids(self, size):
+ return np.random.choice(self.type_ids, size)
class SparseUnionField(_BaseUnionField):
mode = 'SPARSE'
def generate_column(self, size, name=None):
- is_valid = self._make_is_valid(size)
-
- array_type_ids = self._make_type_ids(is_valid)
+ array_type_ids = self._make_type_ids(size)
field_values = [field.generate_column(size) for field in self.fields]
if name is None:
name = self.name
- return SparseUnionColumn(name, size, is_valid, array_type_ids,
- field_values)
+ return SparseUnionColumn(name, size, array_type_ids, field_values)
class DenseUnionField(_BaseUnionField):
mode = 'DENSE'
def generate_column(self, size, name=None):
- is_valid = self._make_is_valid(size)
-
# Reverse mapping {logical type id => physical child id}
child_ids = [None] * (max(self.type_ids) + 1)
for i, type_id in enumerate(self.type_ids):
child_ids[type_id] = i
- array_type_ids = self._make_type_ids(is_valid)
+ array_type_ids = self._make_type_ids(size)
offsets = []
child_sizes = [0] * len(self.fields)
for i in range(size):
- if is_valid[i]:
- child_id = child_ids[array_type_ids[i]]
- offset = child_sizes[child_id]
- offsets.append(offset)
- child_sizes[child_id] = offset + 1
- else:
- offsets.append(0)
+ child_id = child_ids[array_type_ids[i]]
+ offset = child_sizes[child_id]
+ offsets.append(offset)
+ child_sizes[child_id] = offset + 1
field_values = [
field.generate_column(child_size)
@@ -962,8 +952,8 @@ class DenseUnionField(_BaseUnionField):
if name is None:
name = self.name
- return DenseUnionColumn(name, size, is_valid, array_type_ids,
- offsets, field_values)
+ return DenseUnionColumn(name, size, array_type_ids, offsets,
+ field_values)
class Dictionary(object):
@@ -1065,16 +1055,14 @@ class StructColumn(Column):
class SparseUnionColumn(Column):
- def __init__(self, name, count, is_valid, type_ids, field_values):
+ def __init__(self, name, count, type_ids, field_values):
super().__init__(name, count)
- self.is_valid = is_valid
self.type_ids = type_ids
self.field_values = field_values
def _get_buffers(self):
return [
- ('VALIDITY', [int(v) for v in self.is_valid]),
- ('TYPE_ID', [int(v) for v in self.type_ids]),
+ ('TYPE_ID', [int(v) for v in self.type_ids])
]
def _get_children(self):
@@ -1083,16 +1071,14 @@ class SparseUnionColumn(Column):
class DenseUnionColumn(Column):
- def __init__(self, name, count, is_valid, type_ids, offsets, field_values):
+ def __init__(self, name, count, type_ids, offsets, field_values):
super().__init__(name, count)
- self.is_valid = is_valid
self.type_ids = type_ids
self.offsets = offsets
self.field_values = field_values
def _get_buffers(self):
return [
- ('VALIDITY', [int(v) for v in self.is_valid]),
('TYPE_ID', [int(v) for v in self.type_ids]),
('OFFSET', [int(v) for v in self.offsets]),
]
diff --git a/ruby/red-arrow/lib/arrow/struct-array-builder.rb
b/ruby/red-arrow/lib/arrow/struct-array-builder.rb
index d03dbc6..85d27a4 100644
--- a/ruby/red-arrow/lib/arrow/struct-array-builder.rb
+++ b/ruby/red-arrow/lib/arrow/struct-array-builder.rb
@@ -108,9 +108,6 @@ module Arrow
alias_method :append_null_raw, :append_null
def append_null
append_null_raw
- cached_field_builders.each do |builder|
- builder.append_null
- end
end
# @since 0.12.0
diff --git a/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
index 8939c0c..8182804 100644
--- a/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-dense-union-array.rb
@@ -56,10 +56,7 @@ module RawRecordsDenseUnionArrayTests
end
records.each do |record|
column = record[0]
- if column.nil?
- type_ids << nil
- offsets << 0
- elsif column.key?("0")
+ if column.key?("0")
type_id = type_codes[0]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
@@ -82,7 +79,6 @@ module RawRecordsDenseUnionArrayTests
def test_null
records = [
[{"0" => nil}],
- [nil],
]
target = build(:null, records)
assert_equal(records, target.raw_records)
@@ -91,7 +87,6 @@ module RawRecordsDenseUnionArrayTests
def test_boolean
records = [
[{"0" => true}],
- [nil],
[{"1" => nil}],
]
target = build(:boolean, records)
@@ -101,7 +96,6 @@ module RawRecordsDenseUnionArrayTests
def test_int8
records = [
[{"0" => -(2 ** 7)}],
- [nil],
[{"1" => nil}],
]
target = build(:int8, records)
@@ -111,7 +105,6 @@ module RawRecordsDenseUnionArrayTests
def test_uint8
records = [
[{"0" => (2 ** 8) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint8, records)
@@ -121,7 +114,6 @@ module RawRecordsDenseUnionArrayTests
def test_int16
records = [
[{"0" => -(2 ** 15)}],
- [nil],
[{"1" => nil}],
]
target = build(:int16, records)
@@ -131,7 +123,6 @@ module RawRecordsDenseUnionArrayTests
def test_uint16
records = [
[{"0" => (2 ** 16) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint16, records)
@@ -141,7 +132,6 @@ module RawRecordsDenseUnionArrayTests
def test_int32
records = [
[{"0" => -(2 ** 31)}],
- [nil],
[{"1" => nil}],
]
target = build(:int32, records)
@@ -151,7 +141,6 @@ module RawRecordsDenseUnionArrayTests
def test_uint32
records = [
[{"0" => (2 ** 32) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint32, records)
@@ -161,7 +150,6 @@ module RawRecordsDenseUnionArrayTests
def test_int64
records = [
[{"0" => -(2 ** 63)}],
- [nil],
[{"1" => nil}],
]
target = build(:int64, records)
@@ -171,7 +159,6 @@ module RawRecordsDenseUnionArrayTests
def test_uint64
records = [
[{"0" => (2 ** 64) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint64, records)
@@ -181,7 +168,6 @@ module RawRecordsDenseUnionArrayTests
def test_float
records = [
[{"0" => -1.0}],
- [nil],
[{"1" => nil}],
]
target = build(:float, records)
@@ -191,7 +177,6 @@ module RawRecordsDenseUnionArrayTests
def test_double
records = [
[{"0" => -1.0}],
- [nil],
[{"1" => nil}],
]
target = build(:double, records)
@@ -201,7 +186,6 @@ module RawRecordsDenseUnionArrayTests
def test_binary
records = [
[{"0" => "\xff".b}],
- [nil],
[{"1" => nil}],
]
target = build(:binary, records)
@@ -211,7 +195,6 @@ module RawRecordsDenseUnionArrayTests
def test_string
records = [
[{"0" => "Ruby"}],
- [nil],
[{"1" => nil}],
]
target = build(:string, records)
@@ -221,7 +204,6 @@ module RawRecordsDenseUnionArrayTests
def test_date32
records = [
[{"0" => Date.new(1960, 1, 1)}],
- [nil],
[{"1" => nil}],
]
target = build(:date32, records)
@@ -231,7 +213,6 @@ module RawRecordsDenseUnionArrayTests
def test_date64
records = [
[{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
- [nil],
[{"1" => nil}],
]
target = build(:date64, records)
@@ -241,7 +222,6 @@ module RawRecordsDenseUnionArrayTests
def test_timestamp_second
records = [
[{"0" => Time.parse("1960-01-01T02:09:30Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -255,7 +235,6 @@ module RawRecordsDenseUnionArrayTests
def test_timestamp_milli
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -269,7 +248,6 @@ module RawRecordsDenseUnionArrayTests
def test_timestamp_micro
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -283,7 +261,6 @@ module RawRecordsDenseUnionArrayTests
def test_timestamp_nano
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -299,7 +276,6 @@ module RawRecordsDenseUnionArrayTests
records = [
# 00:10:00
[{"0" => Arrow::Time.new(unit, 60 * 10)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -315,7 +291,6 @@ module RawRecordsDenseUnionArrayTests
records = [
# 00:10:00.123
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -331,7 +306,6 @@ module RawRecordsDenseUnionArrayTests
records = [
# 00:10:00.123456
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -347,7 +321,6 @@ module RawRecordsDenseUnionArrayTests
records = [
# 00:10:00.123456789
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 +
123_456_789)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -361,7 +334,6 @@ module RawRecordsDenseUnionArrayTests
def test_decimal128
records = [
[{"0" => BigDecimal("92.92")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -376,7 +348,6 @@ module RawRecordsDenseUnionArrayTests
def test_list
records = [
[{"0" => [true, nil, false]}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -393,7 +364,6 @@ module RawRecordsDenseUnionArrayTests
def test_struct
records = [
[{"0" => {"sub_field" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"sub_field" => nil}}],
]
@@ -414,7 +384,6 @@ module RawRecordsDenseUnionArrayTests
omit("Need to add support for SparseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
@@ -440,7 +409,6 @@ module RawRecordsDenseUnionArrayTests
omit("Need to add support for DenseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
@@ -466,7 +434,6 @@ module RawRecordsDenseUnionArrayTests
omit("Need to add support for DictionaryArrayBuilder")
records = [
[{"0" => "Ruby"}],
- [nil],
[{"1" => nil}],
[{"0" => "GLib"}],
]
diff --git a/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
index d734c3d..394ab89 100644
--- a/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
+++ b/ruby/red-arrow/test/raw-records/test-sparse-union-array.rb
@@ -51,9 +51,7 @@ module RawRecordsSparseUnionArrayTests
end
records.each do |record|
column = record[0]
- if column.nil?
- type_ids << nil
- elsif column.key?("0")
+ if column.key?("0")
type_ids << type_codes[0]
elsif column.key?("1")
type_ids << type_codes[1]
@@ -71,7 +69,6 @@ module RawRecordsSparseUnionArrayTests
def test_null
records = [
[{"0" => nil}],
- [nil],
]
target = build(:null, records)
assert_equal(records, target.raw_records)
@@ -80,7 +77,6 @@ module RawRecordsSparseUnionArrayTests
def test_boolean
records = [
[{"0" => true}],
- [nil],
[{"1" => nil}],
]
target = build(:boolean, records)
@@ -90,7 +86,6 @@ module RawRecordsSparseUnionArrayTests
def test_int8
records = [
[{"0" => -(2 ** 7)}],
- [nil],
[{"1" => nil}],
]
target = build(:int8, records)
@@ -100,7 +95,6 @@ module RawRecordsSparseUnionArrayTests
def test_uint8
records = [
[{"0" => (2 ** 8) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint8, records)
@@ -110,7 +104,6 @@ module RawRecordsSparseUnionArrayTests
def test_int16
records = [
[{"0" => -(2 ** 15)}],
- [nil],
[{"1" => nil}],
]
target = build(:int16, records)
@@ -120,7 +113,6 @@ module RawRecordsSparseUnionArrayTests
def test_uint16
records = [
[{"0" => (2 ** 16) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint16, records)
@@ -130,7 +122,6 @@ module RawRecordsSparseUnionArrayTests
def test_int32
records = [
[{"0" => -(2 ** 31)}],
- [nil],
[{"1" => nil}],
]
target = build(:int32, records)
@@ -140,7 +131,6 @@ module RawRecordsSparseUnionArrayTests
def test_uint32
records = [
[{"0" => (2 ** 32) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint32, records)
@@ -150,7 +140,6 @@ module RawRecordsSparseUnionArrayTests
def test_int64
records = [
[{"0" => -(2 ** 63)}],
- [nil],
[{"1" => nil}],
]
target = build(:int64, records)
@@ -160,7 +149,6 @@ module RawRecordsSparseUnionArrayTests
def test_uint64
records = [
[{"0" => (2 ** 64) - 1}],
- [nil],
[{"1" => nil}],
]
target = build(:uint64, records)
@@ -170,7 +158,6 @@ module RawRecordsSparseUnionArrayTests
def test_float
records = [
[{"0" => -1.0}],
- [nil],
[{"1" => nil}],
]
target = build(:float, records)
@@ -180,7 +167,6 @@ module RawRecordsSparseUnionArrayTests
def test_double
records = [
[{"0" => -1.0}],
- [nil],
[{"1" => nil}],
]
target = build(:double, records)
@@ -190,7 +176,6 @@ module RawRecordsSparseUnionArrayTests
def test_binary
records = [
[{"0" => "\xff".b}],
- [nil],
[{"1" => nil}],
]
target = build(:binary, records)
@@ -200,7 +185,6 @@ module RawRecordsSparseUnionArrayTests
def test_string
records = [
[{"0" => "Ruby"}],
- [nil],
[{"1" => nil}],
]
target = build(:string, records)
@@ -210,7 +194,6 @@ module RawRecordsSparseUnionArrayTests
def test_date32
records = [
[{"0" => Date.new(1960, 1, 1)}],
- [nil],
[{"1" => nil}],
]
target = build(:date32, records)
@@ -220,7 +203,6 @@ module RawRecordsSparseUnionArrayTests
def test_date64
records = [
[{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)}],
- [nil],
[{"1" => nil}],
]
target = build(:date64, records)
@@ -230,7 +212,6 @@ module RawRecordsSparseUnionArrayTests
def test_timestamp_second
records = [
[{"0" => Time.parse("1960-01-01T02:09:30Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -244,7 +225,6 @@ module RawRecordsSparseUnionArrayTests
def test_timestamp_milli
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -258,7 +238,6 @@ module RawRecordsSparseUnionArrayTests
def test_timestamp_micro
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -272,7 +251,6 @@ module RawRecordsSparseUnionArrayTests
def test_timestamp_nano
records = [
[{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -288,7 +266,6 @@ module RawRecordsSparseUnionArrayTests
records = [
# 00:10:00
[{"0" => Arrow::Time.new(unit, 60 * 10)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -304,7 +281,6 @@ module RawRecordsSparseUnionArrayTests
records = [
# 00:10:00.123
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -320,7 +296,6 @@ module RawRecordsSparseUnionArrayTests
records = [
# 00:10:00.123456
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -336,7 +311,6 @@ module RawRecordsSparseUnionArrayTests
records = [
# 00:10:00.123456789
[{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 +
123_456_789)}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -350,7 +324,6 @@ module RawRecordsSparseUnionArrayTests
def test_decimal128
records = [
[{"0" => BigDecimal("92.92")}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -365,7 +338,6 @@ module RawRecordsSparseUnionArrayTests
def test_list
records = [
[{"0" => [true, nil, false]}],
- [nil],
[{"1" => nil}],
]
target = build({
@@ -382,7 +354,6 @@ module RawRecordsSparseUnionArrayTests
def test_struct
records = [
[{"0" => {"sub_field" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"sub_field" => nil}}],
]
@@ -403,7 +374,6 @@ module RawRecordsSparseUnionArrayTests
omit("Need to add support for SparseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
@@ -429,7 +399,6 @@ module RawRecordsSparseUnionArrayTests
omit("Need to add support for DenseUnionArrayBuilder")
records = [
[{"0" => {"field1" => true}}],
- [nil],
[{"1" => nil}],
[{"0" => {"field2" => nil}}],
]
@@ -455,7 +424,6 @@ module RawRecordsSparseUnionArrayTests
omit("Need to add support for DictionaryArrayBuilder")
records = [
[{"0" => "Ruby"}],
- [nil],
[{"1" => nil}],
[{"0" => "GLib"}],
]
diff --git a/ruby/red-arrow/test/values/test-dense-union-array.rb
b/ruby/red-arrow/test/values/test-dense-union-array.rb
index 2457824..c96dc71 100644
--- a/ruby/red-arrow/test/values/test-dense-union-array.rb
+++ b/ruby/red-arrow/test/values/test-dense-union-array.rb
@@ -48,10 +48,7 @@ module ValuesDenseUnionArrayTests
sub_record_batch.columns[0].data
end
values.each do |value|
- if value.nil?
- type_ids << nil
- offsets << 0
- elsif value.key?("0")
+ if value.key?("0")
type_id = type_codes[0]
type_ids << type_id
offsets << (type_ids.count(type_id) - 1)
@@ -70,7 +67,6 @@ module ValuesDenseUnionArrayTests
def test_null
values = [
{"0" => nil},
- nil,
]
target = build(:null, values)
assert_equal(values, target.values)
@@ -79,7 +75,6 @@ module ValuesDenseUnionArrayTests
def test_boolean
values = [
{"0" => true},
- nil,
{"1" => nil},
]
target = build(:boolean, values)
@@ -89,7 +84,6 @@ module ValuesDenseUnionArrayTests
def test_int8
values = [
{"0" => -(2 ** 7)},
- nil,
{"1" => nil},
]
target = build(:int8, values)
@@ -99,7 +93,6 @@ module ValuesDenseUnionArrayTests
def test_uint8
values = [
{"0" => (2 ** 8) - 1},
- nil,
{"1" => nil},
]
target = build(:uint8, values)
@@ -109,7 +102,6 @@ module ValuesDenseUnionArrayTests
def test_int16
values = [
{"0" => -(2 ** 15)},
- nil,
{"1" => nil},
]
target = build(:int16, values)
@@ -119,7 +111,6 @@ module ValuesDenseUnionArrayTests
def test_uint16
values = [
{"0" => (2 ** 16) - 1},
- nil,
{"1" => nil},
]
target = build(:uint16, values)
@@ -129,7 +120,6 @@ module ValuesDenseUnionArrayTests
def test_int32
values = [
{"0" => -(2 ** 31)},
- nil,
{"1" => nil},
]
target = build(:int32, values)
@@ -139,7 +129,6 @@ module ValuesDenseUnionArrayTests
def test_uint32
values = [
{"0" => (2 ** 32) - 1},
- nil,
{"1" => nil},
]
target = build(:uint32, values)
@@ -149,7 +138,6 @@ module ValuesDenseUnionArrayTests
def test_int64
values = [
{"0" => -(2 ** 63)},
- nil,
{"1" => nil},
]
target = build(:int64, values)
@@ -159,7 +147,6 @@ module ValuesDenseUnionArrayTests
def test_uint64
values = [
{"0" => (2 ** 64) - 1},
- nil,
{"1" => nil},
]
target = build(:uint64, values)
@@ -169,7 +156,6 @@ module ValuesDenseUnionArrayTests
def test_float
values = [
{"0" => -1.0},
- nil,
{"1" => nil},
]
target = build(:float, values)
@@ -179,7 +165,6 @@ module ValuesDenseUnionArrayTests
def test_double
values = [
{"0" => -1.0},
- nil,
{"1" => nil},
]
target = build(:double, values)
@@ -189,7 +174,6 @@ module ValuesDenseUnionArrayTests
def test_binary
values = [
{"0" => "\xff".b},
- nil,
{"1" => nil},
]
target = build(:binary, values)
@@ -199,7 +183,6 @@ module ValuesDenseUnionArrayTests
def test_string
values = [
{"0" => "Ruby"},
- nil,
{"1" => nil},
]
target = build(:string, values)
@@ -209,7 +192,6 @@ module ValuesDenseUnionArrayTests
def test_date32
values = [
{"0" => Date.new(1960, 1, 1)},
- nil,
{"1" => nil},
]
target = build(:date32, values)
@@ -219,7 +201,6 @@ module ValuesDenseUnionArrayTests
def test_date64
values = [
{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
- nil,
{"1" => nil},
]
target = build(:date64, values)
@@ -229,7 +210,6 @@ module ValuesDenseUnionArrayTests
def test_timestamp_second
values = [
{"0" => Time.parse("1960-01-01T02:09:30Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -243,7 +223,6 @@ module ValuesDenseUnionArrayTests
def test_timestamp_milli
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -257,7 +236,6 @@ module ValuesDenseUnionArrayTests
def test_timestamp_micro
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -271,7 +249,6 @@ module ValuesDenseUnionArrayTests
def test_timestamp_nano
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -287,7 +264,6 @@ module ValuesDenseUnionArrayTests
values = [
# 00:10:00
{"0" => Arrow::Time.new(unit, 60 * 10)},
- nil,
{"1" => nil},
]
target = build({
@@ -303,7 +279,6 @@ module ValuesDenseUnionArrayTests
values = [
# 00:10:00.123
{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
- nil,
{"1" => nil},
]
target = build({
@@ -319,7 +294,6 @@ module ValuesDenseUnionArrayTests
values = [
# 00:10:00.123456
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
- nil,
{"1" => nil},
]
target = build({
@@ -335,7 +309,6 @@ module ValuesDenseUnionArrayTests
values = [
# 00:10:00.123456789
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
- nil,
{"1" => nil},
]
target = build({
@@ -349,7 +322,6 @@ module ValuesDenseUnionArrayTests
def test_decimal128
values = [
{"0" => BigDecimal("92.92")},
- nil,
{"1" => nil},
]
target = build({
@@ -364,7 +336,6 @@ module ValuesDenseUnionArrayTests
def test_list
values = [
{"0" => [true, nil, false]},
- nil,
{"1" => nil},
]
target = build({
@@ -381,7 +352,6 @@ module ValuesDenseUnionArrayTests
def test_struct
values = [
{"0" => {"sub_field" => true}},
- nil,
{"1" => nil},
{"0" => {"sub_field" => nil}},
]
@@ -402,7 +372,6 @@ module ValuesDenseUnionArrayTests
omit("Need to add support for SparseUnionArrayBuilder")
values = [
{"0" => {"field1" => true}},
- nil,
{"1" => nil},
{"0" => {"field2" => nil}},
]
@@ -428,7 +397,6 @@ module ValuesDenseUnionArrayTests
omit("Need to add support for DenseUnionArrayBuilder")
values = [
{"0" => {"field1" => true}},
- nil,
{"1" => nil},
{"0" => {"field2" => nil}},
]
@@ -454,7 +422,6 @@ module ValuesDenseUnionArrayTests
omit("Need to add support for DictionaryArrayBuilder")
values = [
{"0" => "Ruby"},
- nil,
{"1" => nil},
{"0" => "GLib"},
]
diff --git a/ruby/red-arrow/test/values/test-sparse-union-array.rb
b/ruby/red-arrow/test/values/test-sparse-union-array.rb
index cb18d8a..6e1c53d 100644
--- a/ruby/red-arrow/test/values/test-sparse-union-array.rb
+++ b/ruby/red-arrow/test/values/test-sparse-union-array.rb
@@ -44,9 +44,7 @@ module ValuesSparseUnionArrayTests
sub_record_batch.columns[0].data
end
values.each do |value|
- if value.nil?
- type_ids << nil
- elsif value.key?("0")
+ if value.key?("0")
type_ids << type_codes[0]
elsif value.key?("1")
type_ids << type_codes[1]
@@ -60,7 +58,6 @@ module ValuesSparseUnionArrayTests
def test_null
values = [
{"0" => nil},
- nil,
]
target = build(:null, values)
assert_equal(values, target.values)
@@ -69,7 +66,6 @@ module ValuesSparseUnionArrayTests
def test_boolean
values = [
{"0" => true},
- nil,
{"1" => nil},
]
target = build(:boolean, values)
@@ -79,7 +75,6 @@ module ValuesSparseUnionArrayTests
def test_int8
values = [
{"0" => -(2 ** 7)},
- nil,
{"1" => nil},
]
target = build(:int8, values)
@@ -89,7 +84,6 @@ module ValuesSparseUnionArrayTests
def test_uint8
values = [
{"0" => (2 ** 8) - 1},
- nil,
{"1" => nil},
]
target = build(:uint8, values)
@@ -99,7 +93,6 @@ module ValuesSparseUnionArrayTests
def test_int16
values = [
{"0" => -(2 ** 15)},
- nil,
{"1" => nil},
]
target = build(:int16, values)
@@ -109,7 +102,6 @@ module ValuesSparseUnionArrayTests
def test_uint16
values = [
{"0" => (2 ** 16) - 1},
- nil,
{"1" => nil},
]
target = build(:uint16, values)
@@ -119,7 +111,6 @@ module ValuesSparseUnionArrayTests
def test_int32
values = [
{"0" => -(2 ** 31)},
- nil,
{"1" => nil},
]
target = build(:int32, values)
@@ -129,7 +120,6 @@ module ValuesSparseUnionArrayTests
def test_uint32
values = [
{"0" => (2 ** 32) - 1},
- nil,
{"1" => nil},
]
target = build(:uint32, values)
@@ -139,7 +129,6 @@ module ValuesSparseUnionArrayTests
def test_int64
values = [
{"0" => -(2 ** 63)},
- nil,
{"1" => nil},
]
target = build(:int64, values)
@@ -149,7 +138,6 @@ module ValuesSparseUnionArrayTests
def test_uint64
values = [
{"0" => (2 ** 64) - 1},
- nil,
{"1" => nil},
]
target = build(:uint64, values)
@@ -159,7 +147,6 @@ module ValuesSparseUnionArrayTests
def test_float
values = [
{"0" => -1.0},
- nil,
{"1" => nil},
]
target = build(:float, values)
@@ -169,7 +156,6 @@ module ValuesSparseUnionArrayTests
def test_double
values = [
{"0" => -1.0},
- nil,
{"1" => nil},
]
target = build(:double, values)
@@ -179,7 +165,6 @@ module ValuesSparseUnionArrayTests
def test_binary
values = [
{"0" => "\xff".b},
- nil,
{"1" => nil},
]
target = build(:binary, values)
@@ -189,7 +174,6 @@ module ValuesSparseUnionArrayTests
def test_string
values = [
{"0" => "Ruby"},
- nil,
{"1" => nil},
]
target = build(:string, values)
@@ -199,7 +183,6 @@ module ValuesSparseUnionArrayTests
def test_date32
values = [
{"0" => Date.new(1960, 1, 1)},
- nil,
{"1" => nil},
]
target = build(:date32, values)
@@ -209,7 +192,6 @@ module ValuesSparseUnionArrayTests
def test_date64
values = [
{"0" => DateTime.new(1960, 1, 1, 2, 9, 30)},
- nil,
{"1" => nil},
]
target = build(:date64, values)
@@ -219,7 +201,6 @@ module ValuesSparseUnionArrayTests
def test_timestamp_second
values = [
{"0" => Time.parse("1960-01-01T02:09:30Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -233,7 +214,6 @@ module ValuesSparseUnionArrayTests
def test_timestamp_milli
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -247,7 +227,6 @@ module ValuesSparseUnionArrayTests
def test_timestamp_micro
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -261,7 +240,6 @@ module ValuesSparseUnionArrayTests
def test_timestamp_nano
values = [
{"0" => Time.parse("1960-01-01T02:09:30.123456789Z")},
- nil,
{"1" => nil},
]
target = build({
@@ -277,7 +255,6 @@ module ValuesSparseUnionArrayTests
values = [
# 00:10:00
{"0" => Arrow::Time.new(unit, 60 * 10)},
- nil,
{"1" => nil},
]
target = build({
@@ -293,7 +270,6 @@ module ValuesSparseUnionArrayTests
values = [
# 00:10:00.123
{"0" => Arrow::Time.new(unit, (60 * 10) * 1000 + 123)},
- nil,
{"1" => nil},
]
target = build({
@@ -309,7 +285,6 @@ module ValuesSparseUnionArrayTests
values = [
# 00:10:00.123456
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000 + 123_456)},
- nil,
{"1" => nil},
]
target = build({
@@ -325,7 +300,6 @@ module ValuesSparseUnionArrayTests
values = [
# 00:10:00.123456789
{"0" => Arrow::Time.new(unit, (60 * 10) * 1_000_000_000 + 123_456_789)},
- nil,
{"1" => nil},
]
target = build({
@@ -339,7 +313,6 @@ module ValuesSparseUnionArrayTests
def test_decimal128
values = [
{"0" => BigDecimal("92.92")},
- nil,
{"1" => nil},
]
target = build({
@@ -354,7 +327,6 @@ module ValuesSparseUnionArrayTests
def test_list
values = [
{"0" => [true, nil, false]},
- nil,
{"1" => nil},
]
target = build({
@@ -371,7 +343,6 @@ module ValuesSparseUnionArrayTests
def test_struct
values = [
{"0" => {"sub_field" => true}},
- nil,
{"1" => nil},
{"0" => {"sub_field" => nil}},
]
@@ -392,7 +363,6 @@ module ValuesSparseUnionArrayTests
omit("Need to add support for SparseUnionArrayBuilder")
values = [
{"0" => {"field1" => true}},
- nil,
{"1" => nil},
{"0" => {"field2" => nil}},
]
@@ -418,7 +388,6 @@ module ValuesSparseUnionArrayTests
omit("Need to add support for DenseUnionArrayBuilder")
values = [
{"0" => {"field1" => true}},
- nil,
{"1" => nil},
{"0" => {"field2" => nil}},
]
@@ -444,7 +413,6 @@ module ValuesSparseUnionArrayTests
omit("Need to add support for DictionaryArrayBuilder")
values = [
{"0" => "Ruby"},
- nil,
{"1" => nil},
{"0" => "GLib"},
]