This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 2e8e0c750b Support ListView/BinaryView/RunEndEncoded types in
integration test JSON parser (#9888)
2e8e0c750b is described below
commit 2e8e0c750b930c5bc3138d434c6007ffb7c22e61
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu May 14 16:14:17 2026 -0500
Support ListView/BinaryView/RunEndEncoded types in integration test JSON
parser (#9888)
# Which issue does this PR close?
Supporting unskipping more types in the Rust IPC/C Data tests for
https://github.com/apache/arrow/pull/49910 /
https://github.com/apache/arrow/issues/49744 .
# Rationale for this change
View types and decimal 32/64 are supported in Rust but aren't supported
in the integration test JSON implementation (so they fail when the
integration test tries to check them).
# What changes are included in this PR?
Integration test JSON now supports how these values are represented.
# Are these changes tested?
Yes. I've added to the embedded integration.json for the new types and
I've run the apache/arrow PR against this branch with these types no
longer being skipped.
# Are there any user-facing changes?
No
---------
Co-authored-by: Copilot <[email protected]>
---
arrow-integration-test/data/integration.json | 239 +++++++++++++++++++++++++++
arrow-integration-test/src/datatype.rs | 29 +++-
arrow-integration-test/src/field.rs | 99 +++++++----
arrow-integration-test/src/lib.rs | 232 ++++++++++++++++++++++++++
4 files changed, 560 insertions(+), 39 deletions(-)
diff --git a/arrow-integration-test/data/integration.json
b/arrow-integration-test/data/integration.json
index 7e4a22cddb..fb14718894 100644
--- a/arrow-integration-test/data/integration.json
+++ b/arrow-integration-test/data/integration.json
@@ -319,6 +319,89 @@
"children": []
}
]
+ },
+ {
+ "name": "utf8views",
+ "type": {
+ "name": "utf8view"
+ },
+ "nullable": true,
+ "children": []
+ },
+ {
+ "name": "binaryviews",
+ "type": {
+ "name": "binaryview"
+ },
+ "nullable": true,
+ "children": []
+ },
+ {
+ "name": "listviews",
+ "type": {
+ "name": "listview"
+ },
+ "nullable": true,
+ "children": [
+ {
+ "name": "item",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 32
+ },
+ "nullable": true,
+ "children": []
+ }
+ ]
+ },
+ {
+ "name": "largelistviews",
+ "type": {
+ "name": "largelistview"
+ },
+ "nullable": true,
+ "children": [
+ {
+ "name": "item",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 32
+ },
+ "nullable": true,
+ "children": []
+ }
+ ]
+ },
+ {
+ "name": "runendencoded",
+ "type": {
+ "name": "runendencoded"
+ },
+ "nullable": true,
+ "children": [
+ {
+ "name": "run_ends",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 16
+ },
+ "nullable": false,
+ "children": []
+ },
+ {
+ "name": "values",
+ "type": {
+ "name": "int",
+ "isSigned": true,
+ "bitWidth": 32
+ },
+ "nullable": true,
+ "children": []
+ }
+ ]
}
]
},
@@ -801,6 +884,162 @@
]
}
]
+ },
+ {
+ "name": "utf8views",
+ "count": 3,
+ "VALIDITY": [
+ 1,
+ 0,
+ 1
+ ],
+ "VIEWS": [
+ {
+ "SIZE": 5,
+ "INLINED": "hello"
+ },
+ {
+ "SIZE": 0,
+ "INLINED": ""
+ },
+ {
+ "SIZE": 19,
+ "PREFIX_HEX": "74686973",
+ "BUFFER_INDEX": 0,
+ "OFFSET": 0
+ }
+ ],
+ "VARIADIC_DATA_BUFFERS": ["74686973206973206E6F7420696E6C696E6564"]
+ },
+ {
+ "name": "binaryviews",
+ "count": 3,
+ "VALIDITY": [
+ 1,
+ 1,
+ 0
+ ],
+ "VIEWS": [
+ {
+ "SIZE": 2,
+ "INLINED": "F34D"
+ },
+ {
+ "SIZE": 16,
+ "PREFIX_HEX": "00010203",
+ "BUFFER_INDEX": 0,
+ "OFFSET": 0
+ },
+ {
+ "SIZE": 0,
+ "INLINED": ""
+ }
+ ],
+ "VARIADIC_DATA_BUFFERS": ["000102030405060708090A0B0C0D0E0F"]
+ },
+ {
+ "name": "listviews",
+ "count": 3,
+ "VALIDITY": [
+ 1,
+ 0,
+ 1
+ ],
+ "OFFSET": [
+ 0,
+ 2,
+ 2
+ ],
+ "SIZE": [
+ 2,
+ 0,
+ 3
+ ],
+ "children": [
+ {
+ "name": "item",
+ "count": 5,
+ "VALIDITY": [
+ 1,
+ 1,
+ 1,
+ 0,
+ 1
+ ],
+ "DATA": [
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ]
+ }
+ ]
+ },
+ {
+ "name": "largelistviews",
+ "count": 3,
+ "VALIDITY": [
+ 1,
+ 1,
+ 0
+ ],
+ "OFFSET": [
+ "0",
+ "2",
+ "3"
+ ],
+ "SIZE": [
+ "2",
+ "1",
+ "0"
+ ],
+ "children": [
+ {
+ "name": "item",
+ "count": 3,
+ "VALIDITY": [
+ 1,
+ 0,
+ 1
+ ],
+ "DATA": [
+ 10,
+ 20,
+ 30
+ ]
+ }
+ ]
+ },
+ {
+ "name": "runendencoded",
+ "count": 3,
+ "children": [
+ {
+ "name": "run_ends",
+ "count": 2,
+ "VALIDITY": [
+ 1,
+ 1
+ ],
+ "DATA": [
+ 2,
+ 3
+ ]
+ },
+ {
+ "name": "values",
+ "count": 2,
+ "VALIDITY": [
+ 1,
+ 0
+ ],
+ "DATA": [
+ 100,
+ 200
+ ]
+ }
+ ]
}
]
}
diff --git a/arrow-integration-test/src/datatype.rs
b/arrow-integration-test/src/datatype.rs
index 4c17fbe76b..69174a1c22 100644
--- a/arrow-integration-test/src/datatype.rs
+++ b/arrow-integration-test/src/datatype.rs
@@ -29,6 +29,8 @@ pub fn data_type_from_json(json: &serde_json::Value) ->
Result<DataType> {
Some(s) if s == "bool" => Ok(DataType::Boolean),
Some(s) if s == "binary" => Ok(DataType::Binary),
Some(s) if s == "largebinary" => Ok(DataType::LargeBinary),
+ Some(s) if s == "binaryview" => Ok(DataType::BinaryView),
+ Some(s) if s == "utf8view" => Ok(DataType::Utf8View),
Some(s) if s == "utf8" => Ok(DataType::Utf8),
Some(s) if s == "largeutf8" => Ok(DataType::LargeUtf8),
Some(s) if s == "fixedsizebinary" => {
@@ -182,6 +184,14 @@ pub fn data_type_from_json(json: &serde_json::Value) ->
Result<DataType> {
// return a largelist with any type as its child isn't defined
in the map
Ok(DataType::LargeList(default_field))
}
+ Some(s) if s == "listview" => {
+ // return a listview with any type as its child isn't defined
in the map
+ Ok(DataType::ListView(default_field))
+ }
+ Some(s) if s == "largelistview" => {
+ // return a large listview with any type as its child isn't
defined in the map
+ Ok(DataType::LargeListView(default_field))
+ }
Some(s) if s == "fixedsizelist" => {
// return a list with any type as its child isn't defined in
the map
if let Some(Value::Number(size)) = map.get("listSize") {
@@ -199,6 +209,13 @@ pub fn data_type_from_json(json: &serde_json::Value) ->
Result<DataType> {
// return an empty `struct` type as its children aren't
defined in the map
Ok(DataType::Struct(Fields::empty()))
}
+ Some(s) if s == "runendencoded" => {
+ // return a run end encoded with placeholder types as children
aren't defined in the map
+ Ok(DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int32, false)),
+ default_field,
+ ))
+ }
Some(s) if s == "map" => {
if let Some(Value::Bool(keys_sorted)) = map.get("keysSorted") {
// Return a map with an empty type as its children aren't
defined in the map
@@ -271,9 +288,8 @@ pub fn data_type_to_json(data_type: &DataType) ->
serde_json::Value {
DataType::LargeUtf8 => json!({"name": "largeutf8"}),
DataType::Binary => json!({"name": "binary"}),
DataType::LargeBinary => json!({"name": "largebinary"}),
- DataType::BinaryView | DataType::Utf8View => {
- unimplemented!("BinaryView/Utf8View not implemented")
- }
+ DataType::BinaryView => json!({"name": "binaryview"}),
+ DataType::Utf8View => json!({"name": "utf8view"}),
DataType::FixedSizeBinary(byte_width) => {
json!({"name": "fixedsizebinary", "byteWidth": byte_width})
}
@@ -281,9 +297,8 @@ pub fn data_type_to_json(data_type: &DataType) ->
serde_json::Value {
DataType::Union(_, _) => json!({"name": "union"}),
DataType::List(_) => json!({ "name": "list"}),
DataType::LargeList(_) => json!({ "name": "largelist"}),
- DataType::ListView(_) | DataType::LargeListView(_) => {
- unimplemented!("ListView/LargeListView not implemented")
- }
+ DataType::ListView(_) => json!({ "name": "listview"}),
+ DataType::LargeListView(_) => json!({ "name": "largelistview"}),
DataType::FixedSizeList(_, length) => {
json!({"name":"fixedsizelist", "listSize": length})
}
@@ -352,7 +367,7 @@ pub fn data_type_to_json(data_type: &DataType) ->
serde_json::Value {
DataType::Map(_, keys_sorted) => {
json!({"name": "map", "keysSorted": keys_sorted})
}
- DataType::RunEndEncoded(_, _) => todo!(),
+ DataType::RunEndEncoded(_, _) => json!({"name": "runendencoded"}),
}
}
diff --git a/arrow-integration-test/src/field.rs
b/arrow-integration-test/src/field.rs
index 8b0ca264e0..2a32fa9fcb 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -114,43 +114,50 @@ pub fn field_from_json(json: &serde_json::Value) ->
Result<Field> {
// if data_type is a struct or list, get its children
let data_type = match data_type {
- DataType::List(_) | DataType::LargeList(_) |
DataType::FixedSizeList(_, _) => {
- match map.get("children") {
- Some(Value::Array(values)) => {
- if values.len() != 1 {
- return Err(ArrowError::ParseError(
- "Field 'children' must have one element
for a list data type"
- .to_string(),
- ));
- }
- match data_type {
- DataType::List(_) => {
-
DataType::List(Arc::new(field_from_json(&values[0])?))
- }
- DataType::LargeList(_) => {
-
DataType::LargeList(Arc::new(field_from_json(&values[0])?))
- }
- DataType::FixedSizeList(_, int) =>
DataType::FixedSizeList(
- Arc::new(field_from_json(&values[0])?),
- int,
- ),
- _ => unreachable!(
- "Data type should be a list, largelist or
fixedsizelist"
- ),
- }
- }
- Some(_) => {
+ DataType::List(_)
+ | DataType::LargeList(_)
+ | DataType::ListView(_)
+ | DataType::LargeListView(_)
+ | DataType::FixedSizeList(_, _) => match map.get("children") {
+ Some(Value::Array(values)) => {
+ if values.len() != 1 {
return Err(ArrowError::ParseError(
- "Field 'children' must be an
array".to_string(),
+ "Field 'children' must have one element for a
list data type"
+ .to_string(),
));
}
- None => {
- return Err(ArrowError::ParseError(
- "Field missing 'children'
attribute".to_string(),
- ));
+ match data_type {
+ DataType::List(_) => {
+
DataType::List(Arc::new(field_from_json(&values[0])?))
+ }
+ DataType::LargeList(_) => {
+
DataType::LargeList(Arc::new(field_from_json(&values[0])?))
+ }
+ DataType::ListView(_) => {
+
DataType::ListView(Arc::new(field_from_json(&values[0])?))
+ }
+ DataType::LargeListView(_) => {
+
DataType::LargeListView(Arc::new(field_from_json(&values[0])?))
+ }
+ DataType::FixedSizeList(_, int) => {
+
DataType::FixedSizeList(Arc::new(field_from_json(&values[0])?), int)
+ }
+ _ => unreachable!(
+ "Data type should be a list, largelist,
listview, largelistview or fixedsizelist"
+ ),
}
}
- }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array".to_string(),
+ ));
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children' attribute".to_string(),
+ ));
+ }
+ },
DataType::Struct(_) => match map.get("children") {
Some(Value::Array(values)) => {
DataType::Struct(values.iter().map(field_from_json).collect::<Result<_>>()?)
@@ -215,6 +222,29 @@ pub fn field_from_json(json: &serde_json::Value) ->
Result<Field> {
));
}
},
+ DataType::RunEndEncoded(_, _) => match map.get("children") {
+ Some(Value::Array(values)) => {
+ if values.len() != 2 {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must have exactly 2 elements
for RunEndEncoded"
+ .to_string(),
+ ));
+ }
+ let run_ends = Arc::new(field_from_json(&values[0])?);
+ let values_field =
Arc::new(field_from_json(&values[1])?);
+ DataType::RunEndEncoded(run_ends, values_field)
+ }
+ Some(_) => {
+ return Err(ArrowError::ParseError(
+ "Field 'children' must be an array".to_string(),
+ ));
+ }
+ None => {
+ return Err(ArrowError::ParseError(
+ "Field missing 'children' attribute".to_string(),
+ ));
+ }
+ },
_ => data_type,
};
@@ -269,8 +299,13 @@ pub fn field_to_json(field: &Field) -> serde_json::Value {
DataType::Struct(fields) => fields.iter().map(|x|
field_to_json(x.as_ref())).collect(),
DataType::List(field)
| DataType::LargeList(field)
+ | DataType::ListView(field)
+ | DataType::LargeListView(field)
| DataType::FixedSizeList(field, _)
| DataType::Map(field, _) => vec![field_to_json(field)],
+ DataType::RunEndEncoded(run_ends, values) => {
+ vec![field_to_json(run_ends), field_to_json(values)]
+ }
_ => vec![],
};
diff --git a/arrow-integration-test/src/lib.rs
b/arrow-integration-test/src/lib.rs
index 0f0b4fe2ff..e0aa3ecf85 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -203,6 +203,15 @@ pub struct ArrowJsonColumn {
/// The type id for union types
#[serde(rename = "TYPE_ID")]
pub type_id: Option<Vec<i8>>,
+ /// The sizes for ListView/LargeListView types
+ #[serde(rename = "SIZE")]
+ pub size: Option<Vec<Value>>,
+ /// The views for BinaryView/Utf8View types
+ #[serde(rename = "VIEWS")]
+ pub views: Option<Vec<Value>>,
+ /// The variadic data buffers for BinaryView/Utf8View types
+ #[serde(rename = "VARIADIC_DATA_BUFFERS")]
+ pub variadic_data_buffers: Option<Vec<String>>,
/// The children columns for nested types
pub children: Option<Vec<ArrowJsonColumn>>,
}
@@ -772,6 +781,66 @@ pub fn array_from_json(
.unwrap();
Ok(Arc::new(LargeListArray::from(list_data)))
}
+ DataType::ListView(child_field) => {
+ let null_buf = create_null_buf(&json_col);
+ let children = json_col.children.clone().unwrap();
+ let child_array = array_from_json(child_field,
children[0].clone(), dictionaries)?;
+ let offsets: Vec<i32> = json_col
+ .offset
+ .unwrap()
+ .iter()
+ .map(|v| v.as_i64().unwrap() as i32)
+ .collect();
+ let sizes: Vec<i32> = json_col
+ .size
+ .unwrap()
+ .iter()
+ .map(|v| v.as_i64().unwrap() as i32)
+ .collect();
+ let list_data = ArrayData::builder(field.data_type().clone())
+ .len(json_col.count)
+ .add_buffer(Buffer::from(offsets.to_byte_slice()))
+ .add_buffer(Buffer::from(sizes.to_byte_slice()))
+ .add_child_data(child_array.into_data())
+ .null_bit_buffer(Some(null_buf))
+ .build()
+ .unwrap();
+ Ok(Arc::new(ListViewArray::from(list_data)))
+ }
+ DataType::LargeListView(child_field) => {
+ let null_buf = create_null_buf(&json_col);
+ let children = json_col.children.clone().unwrap();
+ let child_array = array_from_json(child_field,
children[0].clone(), dictionaries)?;
+ let offsets: Vec<i64> = json_col
+ .offset
+ .unwrap()
+ .iter()
+ .map(|v| match v {
+ Value::Number(n) => n.as_i64().unwrap(),
+ Value::String(s) => s.parse::<i64>().unwrap(),
+ _ => panic!("64-bit offset must be either string or
number"),
+ })
+ .collect();
+ let sizes: Vec<i64> = json_col
+ .size
+ .unwrap()
+ .iter()
+ .map(|v| match v {
+ Value::Number(n) => n.as_i64().unwrap(),
+ Value::String(s) => s.parse::<i64>().unwrap(),
+ _ => panic!("64-bit size must be either string or number"),
+ })
+ .collect();
+ let list_data = ArrayData::builder(field.data_type().clone())
+ .len(json_col.count)
+ .add_buffer(Buffer::from(offsets.to_byte_slice()))
+ .add_buffer(Buffer::from(sizes.to_byte_slice()))
+ .add_child_data(child_array.into_data())
+ .null_bit_buffer(Some(null_buf))
+ .build()
+ .unwrap();
+ Ok(Arc::new(LargeListViewArray::from(list_data)))
+ }
DataType::FixedSizeList(child_field, _) => {
let children = json_col.children.clone().unwrap();
let child_array = array_from_json(child_field,
children[0].clone(), dictionaries)?;
@@ -953,6 +1022,86 @@ pub fn array_from_json(
UnionArray::try_new(fields.clone(), type_ids.into(), offset,
children).unwrap();
Ok(Arc::new(array))
}
+ DataType::Utf8View => {
+ let views = json_col.views.ok_or_else(|| {
+ ArrowError::JsonError("Utf8View requires VIEWS
field".to_string())
+ })?;
+ let variadic_buffers =
json_col.variadic_data_buffers.unwrap_or_default();
+ let validity = json_col.validity.as_ref();
+
+ let mut builder = StringViewBuilder::new();
+ for (i, view) in views.iter().enumerate() {
+ let is_valid = validity.map_or(1, |v| v[i]);
+ if is_valid == 0 {
+ builder.append_null();
+ } else {
+ let view_obj = view.as_object().unwrap();
+ let size = view_obj["SIZE"].as_u64().unwrap() as usize;
+ // Check for INLINED key presence - inlined if SIZE <= 12
+ if let Some(inlined) = view_obj.get("INLINED") {
+ builder.append_value(inlined.as_str().unwrap());
+ } else {
+ // Reference to variadic buffer
+ let buffer_index =
view_obj["BUFFER_INDEX"].as_u64().unwrap() as usize;
+ let offset = view_obj["OFFSET"].as_u64().unwrap() as
usize;
+ let buffer_data =
hex::decode(&variadic_buffers[buffer_index]).unwrap();
+ let s =
std::str::from_utf8(&buffer_data[offset..offset + size]).unwrap();
+ builder.append_value(s);
+ }
+ }
+ }
+ Ok(Arc::new(builder.finish()))
+ }
+ DataType::BinaryView => {
+ let views = json_col.views.ok_or_else(|| {
+ ArrowError::JsonError("BinaryView requires VIEWS
field".to_string())
+ })?;
+ let variadic_buffers =
json_col.variadic_data_buffers.unwrap_or_default();
+ let validity = json_col.validity.as_ref();
+
+ let mut builder = BinaryViewBuilder::new();
+ for (i, view) in views.iter().enumerate() {
+ let is_valid = validity.map_or(1, |v| v[i]);
+ if is_valid == 0 {
+ builder.append_null();
+ } else {
+ let view_obj = view.as_object().unwrap();
+ let size = view_obj["SIZE"].as_u64().unwrap() as usize;
+ // Check for INLINED key presence - inlined if SIZE <= 12
+ if let Some(inlined) = view_obj.get("INLINED") {
+ let data =
hex::decode(inlined.as_str().unwrap()).unwrap();
+ builder.append_value(&data);
+ } else {
+ // Reference to variadic buffer
+ let buffer_index =
view_obj["BUFFER_INDEX"].as_u64().unwrap() as usize;
+ let offset = view_obj["OFFSET"].as_u64().unwrap() as
usize;
+ let buffer_data =
hex::decode(&variadic_buffers[buffer_index]).unwrap();
+ builder.append_value(&buffer_data[offset..offset +
size]);
+ }
+ }
+ }
+ Ok(Arc::new(builder.finish()))
+ }
+ DataType::RunEndEncoded(run_ends_field, values_field) => {
+ let children = json_col.children.clone().unwrap();
+ if children.len() != 2 {
+ return Err(ArrowError::JsonError(
+ "RunEndEncoded requires exactly 2 children".to_string(),
+ ));
+ }
+ let run_ends_array =
+ array_from_json(run_ends_field, children[0].clone(),
dictionaries)?;
+ let values_array = array_from_json(values_field,
children[1].clone(), dictionaries)?;
+
+ let run_array_data = ArrayData::builder(field.data_type().clone())
+ .len(json_col.count)
+ .add_child_data(run_ends_array.into_data())
+ .add_child_data(values_array.into_data())
+ .build()
+ .unwrap();
+
+ Ok(make_array(run_array_data))
+ }
t => Err(ArrowError::JsonError(format!(
"data type {t} not supported"
))),
@@ -1092,6 +1241,9 @@ impl ArrowJsonBatch {
data: Some(data),
offset: None,
type_id: None,
+ size: None,
+ views: None,
+ variadic_data_buffers: None,
children: None,
}
}
@@ -1102,6 +1254,9 @@ impl ArrowJsonBatch {
data: None,
offset: None,
type_id: None,
+ size: None,
+ views: None,
+ variadic_data_buffers: None,
children: None,
},
};
@@ -1265,6 +1420,26 @@ mod tests {
])),
true,
),
+ Field::new("utf8views", DataType::Utf8View, true),
+ Field::new("binaryviews", DataType::BinaryView, true),
+ Field::new(
+ "listviews",
+
DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
+ true,
+ ),
+ Field::new(
+ "largelistviews",
+
DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true))),
+ true,
+ ),
+ Field::new(
+ "runendencoded",
+ DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int16, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ ),
+ true,
+ ),
]);
let bools_with_metadata_map = BooleanArray::from(vec![Some(true),
None, Some(false)]);
@@ -1336,6 +1511,58 @@ mod tests {
.unwrap();
let structs = StructArray::from(struct_data);
+ let utf8views =
+ StringViewArray::from(vec![Some("hello"), None, Some("this is not
inlined")]);
+ let binaryviews = BinaryViewArray::from_iter(vec![
+ Some(b"\xf3\x4d".as_slice()),
+
Some(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f".as_slice()),
+ None,
+ ]);
+
+ let listview_value_data = Int32Array::from(vec![Some(1), Some(2),
Some(3), None, Some(5)]);
+ let listview_offsets = Buffer::from_slice_ref([0i32, 2, 2]);
+ let listview_sizes = Buffer::from_slice_ref([2i32, 0, 3]);
+ let listview_data_type =
+ DataType::ListView(Arc::new(Field::new_list_field(DataType::Int32,
true)));
+ let listview_data = ArrayData::builder(listview_data_type)
+ .len(3)
+ .add_buffer(listview_offsets)
+ .add_buffer(listview_sizes)
+ .add_child_data(listview_value_data.into_data())
+ .null_bit_buffer(Some(Buffer::from([0b00000101])))
+ .build()
+ .unwrap();
+ let listviews = ListViewArray::from(listview_data);
+
+ let largelistview_value_data = Int32Array::from(vec![Some(10), None,
Some(30)]);
+ let largelistview_offsets = Buffer::from_slice_ref([0i64, 2, 3]);
+ let largelistview_sizes = Buffer::from_slice_ref([2i64, 1, 0]);
+ let largelistview_data_type =
+
DataType::LargeListView(Arc::new(Field::new_list_field(DataType::Int32, true)));
+ let largelistview_data = ArrayData::builder(largelistview_data_type)
+ .len(3)
+ .add_buffer(largelistview_offsets)
+ .add_buffer(largelistview_sizes)
+ .add_child_data(largelistview_value_data.into_data())
+ .null_bit_buffer(Some(Buffer::from([0b00000011])))
+ .build()
+ .unwrap();
+ let largelistviews = LargeListViewArray::from(largelistview_data);
+
+ let ree_run_ends = Int16Array::from(vec![2, 3]);
+ let ree_values = Int32Array::from(vec![Some(100), None]);
+ let ree_data_type = DataType::RunEndEncoded(
+ Arc::new(Field::new("run_ends", DataType::Int16, false)),
+ Arc::new(Field::new("values", DataType::Int32, true)),
+ );
+ let ree_data = ArrayData::builder(ree_data_type)
+ .len(3)
+ .add_child_data(ree_run_ends.into_data())
+ .add_child_data(ree_values.into_data())
+ .build()
+ .unwrap();
+ let runendencoded = RunArray::<Int16Type>::from(ree_data);
+
let record_batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![
@@ -1369,6 +1596,11 @@ mod tests {
Arc::new(utf8s),
Arc::new(lists),
Arc::new(structs),
+ Arc::new(utf8views),
+ Arc::new(binaryviews),
+ Arc::new(listviews),
+ Arc::new(largelistviews),
+ Arc::new(runendencoded),
],
)
.unwrap();