This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 6412c3aaa0 Add end-to-end Parquet tests for List and LargeList struct
schema evolution (#20840)
6412c3aaa0 is described below
commit 6412c3aaa0e7187b20bb219856f594f1c5e69fef
Author: kosiew <[email protected]>
AuthorDate: Tue Mar 31 17:07:19 2026 +0800
Add end-to-end Parquet tests for List and LargeList struct schema evolution
(#20840)
## Which issue does this PR close?
* Part of #20835
## Rationale for this change
While the core fixes for nested struct schema evolution have landed in
#20907, existing coverage is primarily at the unit/helper level. This PR
adds end-to-end Parquet-based integration tests to validate that
List<Struct> and LargeList<Struct> schema evolution behaves correctly
through the full execution pipeline (planning, scanning, and
projection).
This ensures that real-world query paths such as `SELECT *` and nested
field projection behave consistently and that previous repro cases are
no longer failing.
## What changes are included in this PR?
### 1. End-to-end Rust integration tests
Added comprehensive tests in:
* `datafusion/core/tests/parquet/expr_adapter.rs`
These tests:
* Generate old/new Parquet files with differing nested struct schemas
* Cover both `List<Struct<...>>` and `LargeList<Struct<...>>`
* Validate:
* `SELECT *` correctness
* Nested field projection via `get_field(...)`
* NULL backfilling for missing nullable fields
* Ignoring extra source-only fields
### 2. Error-path coverage
Added failure tests for both `List` and `LargeList`:
* Non-nullable missing field → error
* Incompatible nested field type → error
Ensures parity across both list encodings and prevents partial
regressions.
### 3. Test utilities and refactoring
Introduced reusable helpers to simplify nested test setup:
* `NestedListKind` abstraction for List vs LargeList
* `NestedMessageRow` test fixture struct
* Batch builders and schema helpers
* Macro `test_struct_schema_evolution_pair!` to generate paired tests
These reduce duplication and make it easier to extend the test matrix.
### 4. End-user API coverage via `.slt`
Added:
* `datafusion/sqllogictest/test_files/schema_evolution_nested.slt`
This validates behavior through SQL-only workflows:
* Uses `COPY ... TO PARQUET` to generate test files
* Uses `CREATE EXTERNAL TABLE` to query them
Covers:
* Mixed-schema reads
* Nested projection queries
* Both `List` and `LargeList`
---
## Are these changes tested?
Yes.
This PR adds both:
1. **Rust integration tests**
* End-to-end Parquet scan behavior
* Success and failure scenarios
* Covers both `List` and `LargeList`
2. **sqllogictest (`.slt`) tests**
* Validates behavior through end-user SQL interface
* Uses generated Parquet fixtures (no checked-in binaries)
All tests pass locally, including:
* `test_list_struct_schema_evolution_end_to_end`
* `test_large_list_struct_schema_evolution_end_to_end`
* Error-path variants for both list encodings
## Are there any user-facing changes?
No direct user-facing changes.
This PR improves correctness guarantees and test coverage for nested
schema evolution, ensuring more predictable behavior for users working
with evolving Parquet schemas.
## LLM-generated code disclosure
This PR includes LLM-generated code and comments. All LLM-generated
content has been manually reviewed and tested.
---
datafusion/core/tests/parquet/expr_adapter.rs | 524 ++++++++++++++++++++-
.../test_files/schema_evolution_nested.slt | 124 +++++
2 files changed, 646 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/tests/parquet/expr_adapter.rs
b/datafusion/core/tests/parquet/expr_adapter.rs
index f412cdf9bd..cf32efbd70 100644
--- a/datafusion/core/tests/parquet/expr_adapter.rs
+++ b/datafusion/core/tests/parquet/expr_adapter.rs
@@ -18,9 +18,11 @@
use std::sync::Arc;
use arrow::array::{
- Array, ArrayRef, BooleanArray, Int32Array, Int64Array, RecordBatch,
StringArray,
- StructArray, record_batch,
+ Array, ArrayRef, BooleanArray, Int32Array, Int64Array, LargeListArray,
ListArray,
+ RecordBatch, StringArray, StructArray, record_batch,
};
+use arrow::buffer::OffsetBuffer;
+use arrow::compute::concat_batches;
use arrow_schema::{DataType, Field, Fields, Schema, SchemaRef};
use bytes::{BufMut, BytesMut};
use datafusion::assert_batches_eq;
@@ -54,6 +56,405 @@ async fn write_parquet(batch: RecordBatch, store: Arc<dyn
ObjectStore>, path: &s
store.put(&Path::from(path), data.into()).await.unwrap();
}
+#[derive(Debug, Clone, Copy)]
+enum NestedListKind {
+ List,
+ LargeList,
+}
+
+impl NestedListKind {
+ fn field_data_type(self, item_field: Arc<Field>) -> DataType {
+ match self {
+ Self::List => DataType::List(item_field),
+ Self::LargeList => DataType::LargeList(item_field),
+ }
+ }
+
+ fn array(
+ self,
+ item_field: Arc<Field>,
+ lengths: Vec<usize>,
+ values: ArrayRef,
+ ) -> ArrayRef {
+ match self {
+ Self::List => Arc::new(ListArray::new(
+ item_field,
+ OffsetBuffer::<i32>::from_lengths(lengths),
+ values,
+ None,
+ )),
+ Self::LargeList => Arc::new(LargeListArray::new(
+ item_field,
+ OffsetBuffer::<i64>::from_lengths(lengths),
+ values,
+ None,
+ )),
+ }
+ }
+
+ fn name(self) -> &'static str {
+ match self {
+ Self::List => "list",
+ Self::LargeList => "large_list",
+ }
+ }
+}
+
+#[derive(Debug)]
+// Fixture row for one nested struct element inside the `messages` list column.
+struct NestedMessageRow<'a> {
+ id: i32,
+ name: &'a str,
+ chain: Option<&'a str>,
+ ignored: Option<i32>,
+}
+
+fn message_fields(
+ chain_type: DataType,
+ chain_nullable: bool,
+ include_chain: bool,
+ include_ignored: bool,
+) -> Fields {
+ let mut fields = vec![
+ Arc::new(Field::new("id", DataType::Int32, false)),
+ Arc::new(Field::new("name", DataType::Utf8, true)),
+ ];
+ if include_chain {
+ fields.push(Arc::new(Field::new("chain", chain_type, chain_nullable)));
+ }
+ if include_ignored {
+ fields.push(Arc::new(Field::new("ignored", DataType::Int32, true)));
+ }
+ fields.into()
+}
+
+// Helper to construct the target message schema for struct evolution tests.
+// The schema always has id (Int64), name (Utf8), and chain with parameterized
type.
+fn target_message_fields(chain_type: DataType, chain_nullable: bool) -> Fields
{
+ vec![
+ Arc::new(Field::new("id", DataType::Int64, false)),
+ Arc::new(Field::new("name", DataType::Utf8, true)),
+ Arc::new(Field::new("chain", chain_type, chain_nullable)),
+ ]
+ .into()
+}
+
+// Helper to build message columns in canonical order (id, name, chain,
ignored)
+// based on which optional fields are present in the schema.
+fn build_message_columns(
+ id_array: &ArrayRef,
+ name_array: &ArrayRef,
+ chain_vec: &[Option<&str>],
+ ignored_array: &ArrayRef,
+ fields: &Fields,
+) -> Vec<ArrayRef> {
+ let mut columns = vec![Arc::clone(id_array), Arc::clone(name_array)];
+
+ for field in fields.iter().skip(2) {
+ match field.name().as_str() {
+ "chain" => {
+ let chain_array = match field.data_type() {
+ DataType::Utf8 => {
+ Arc::new(StringArray::from(chain_vec.to_vec())) as
ArrayRef
+ }
+ DataType::Struct(chain_fields) => {
+ let chain_struct = StructArray::new(
+ chain_fields.clone(),
+
vec![Arc::new(StringArray::from(chain_vec.to_vec()))
+ as ArrayRef],
+ None,
+ );
+ Arc::new(chain_struct) as ArrayRef
+ }
+ other => panic!("unexpected chain field type: {other:?}"),
+ };
+ columns.push(chain_array);
+ }
+ "ignored" => columns.push(Arc::clone(ignored_array)),
+ _ => {}
+ }
+ }
+ columns
+}
+
+fn nested_messages_batch(
+ kind: NestedListKind,
+ row_id: i32,
+ messages: &[NestedMessageRow<'_>],
+ fields: &Fields,
+) -> RecordBatch {
+ let item_field = Arc::new(Field::new("item",
DataType::Struct(fields.clone()), true));
+
+ let (ids_vec, names_vec, chain_vec, ignored_vec) = messages.iter().fold(
+ (
+ Vec::with_capacity(messages.len()),
+ Vec::with_capacity(messages.len()),
+ Vec::with_capacity(messages.len()),
+ Vec::with_capacity(messages.len()),
+ ),
+ |(mut ids, mut names, mut chains, mut ignoreds), msg| {
+ ids.push(msg.id);
+ names.push(Some(msg.name));
+ chains.push(msg.chain);
+ ignoreds.push(msg.ignored);
+ (ids, names, chains, ignoreds)
+ },
+ );
+
+ // Build all arrays once
+ let id_array = Arc::new(Int32Array::from(ids_vec)) as ArrayRef;
+ let name_array = Arc::new(StringArray::from(names_vec)) as ArrayRef;
+ let ignored_array = Arc::new(Int32Array::from(ignored_vec)) as ArrayRef;
+
+ // Build columns in canonical order (id, name, chain, ignored) based on
field schema
+ let columns =
+ build_message_columns(&id_array, &name_array, &chain_vec,
&ignored_array, fields);
+
+ let struct_array = StructArray::new(fields.clone(), columns, None);
+
+ // Compute the message data type first, then move item_field into
kind.array()
+ let message_data_type = kind.field_data_type(item_field.clone());
+ let messages_array =
+ kind.array(item_field, vec![messages.len()], Arc::new(struct_array));
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("row_id", DataType::Int32, false),
+ Field::new("messages", message_data_type, true),
+ ]));
+
+ RecordBatch::try_new(
+ schema,
+ vec![
+ Arc::new(Int32Array::from(vec![row_id])) as ArrayRef,
+ messages_array,
+ ],
+ )
+ .unwrap()
+}
+
+async fn register_memory_listing_table(
+ ctx: &SessionContext,
+ store: Arc<dyn ObjectStore>,
+ base_path: &str,
+ table_schema: SchemaRef,
+) {
+ let store_url = ObjectStoreUrl::parse("memory://").unwrap();
+ ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
+
+ let listing_table_config =
+ ListingTableConfig::new(ListingTableUrl::parse(base_path).unwrap())
+ .infer_options(&ctx.state())
+ .await
+ .unwrap()
+ .with_schema(table_schema)
+
.with_expr_adapter_factory(Arc::new(DefaultPhysicalExprAdapterFactory));
+
+ let table = ListingTable::try_new(listing_table_config).unwrap();
+ ctx.register_table("t", Arc::new(table)).unwrap();
+}
+
+fn test_context() -> SessionContext {
+ let mut cfg = SessionConfig::new()
+ .with_collect_statistics(false)
+ .with_parquet_pruning(false)
+ .with_parquet_page_index_pruning(false);
+ cfg.options_mut().execution.parquet.pushdown_filters = true;
+ SessionContext::new_with_config(cfg)
+}
+
+fn nested_list_table_schema(
+ kind: NestedListKind,
+ target_message_fields: Fields,
+) -> SchemaRef {
+ let target_item = Arc::new(Field::new(
+ "item",
+ DataType::Struct(target_message_fields),
+ true,
+ ));
+ Arc::new(Schema::new(vec![
+ Field::new("row_id", DataType::Int32, false),
+ Field::new("messages", kind.field_data_type(target_item), true),
+ ]))
+}
+
+// Helper to extract message values from a nested list column.
+// Returns the values at indices 0 and 1 from either a ListArray or
LargeListArray.
+fn extract_nested_list_values(
+ kind: NestedListKind,
+ column: &ArrayRef,
+) -> (ArrayRef, ArrayRef) {
+ match kind {
+ NestedListKind::List => {
+ let list = column
+ .as_any()
+ .downcast_ref::<ListArray>()
+ .expect("messages should be a ListArray");
+ (list.value(0), list.value(1))
+ }
+ NestedListKind::LargeList => {
+ let list = column
+ .as_any()
+ .downcast_ref::<LargeListArray>()
+ .expect("messages should be a LargeListArray");
+ (list.value(0), list.value(1))
+ }
+ }
+}
+
+// Helper to set up a nested list test fixture.
+// Creates an in-memory store, writes the provided batches to parquet files,
+// creates a SessionContext, and registers the resulting table.
+// Returns the prepared context ready for queries.
+async fn setup_nested_list_test(
+ kind: NestedListKind,
+ prefix_base: &str,
+ batches: Vec<(String, RecordBatch)>,
+ table_schema: SchemaRef,
+) -> SessionContext {
+ let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+ let prefix = format!("{}_{}", kind.name(), prefix_base);
+
+ for (filename, batch) in batches {
+ write_parquet(batch, Arc::clone(&store),
&format!("{prefix}/{filename}")).await;
+ }
+
+ let ctx = test_context();
+ register_memory_listing_table(
+ &ctx,
+ Arc::clone(&store),
+ &format!("memory:///{prefix}/"),
+ table_schema,
+ )
+ .await;
+
+ ctx
+}
+
+async fn assert_nested_list_struct_schema_evolution(kind: NestedListKind) ->
Result<()> {
+ // old.parquet shape: messages item struct has only (id, name), no `chain`.
+ let old_batch = nested_messages_batch(
+ kind,
+ 1,
+ &[
+ NestedMessageRow {
+ id: 10,
+ name: "alpha",
+ chain: None,
+ ignored: None,
+ },
+ NestedMessageRow {
+ id: 20,
+ name: "beta",
+ chain: None,
+ ignored: None,
+ },
+ ],
+ &message_fields(DataType::Utf8, true, false, false),
+ );
+
+ // new.parquet shape: messages item struct adds nullable `chain` and extra
`ignored`.
+ let new_batch = nested_messages_batch(
+ kind,
+ 2,
+ &[NestedMessageRow {
+ id: 30,
+ name: "gamma",
+ chain: Some("eth"),
+ ignored: Some(99),
+ }],
+ &message_fields(DataType::Utf8, true, true, true),
+ );
+
+ // Logical table schema expects evolved shape (id, name, nullable `chain`)
and
+ // should ignore source-only `ignored` during reads.
+ let table_schema =
+ nested_list_table_schema(kind, target_message_fields(DataType::Utf8,
true));
+
+ let ctx = setup_nested_list_test(
+ kind,
+ "struct_evolution",
+ vec![
+ ("old.parquet".to_string(), old_batch),
+ ("new.parquet".to_string(), new_batch),
+ ],
+ table_schema,
+ )
+ .await;
+
+ let select_all = ctx
+ .sql("SELECT * FROM t ORDER BY row_id")
+ .await?
+ .collect()
+ .await?;
+ let all_rows = concat_batches(&select_all[0].schema(), &select_all)?;
+
+ let row_ids = all_rows
+ .column(0)
+ .as_any()
+ .downcast_ref::<Int32Array>()
+ .expect("row_id should be Int32");
+ assert_eq!(row_ids.values(), &[1, 2]);
+
+ let (messages0, messages1) = extract_nested_list_values(kind,
all_rows.column(1));
+
+ let messages0 = messages0
+ .as_any()
+ .downcast_ref::<StructArray>()
+ .expect("messages[0] should be a StructArray");
+ let old_ids = messages0
+ .column_by_name("id")
+ .unwrap()
+ .as_any()
+ .downcast_ref::<Int64Array>()
+ .unwrap();
+ assert_eq!(old_ids.values(), &[10, 20]);
+ let old_chain = messages0
+ .column_by_name("chain")
+ .unwrap()
+ .as_any()
+ .downcast_ref::<StringArray>()
+ .unwrap();
+ assert_eq!(old_chain.iter().collect::<Vec<_>>(), vec![None, None]);
+
+ let messages1 = messages1
+ .as_any()
+ .downcast_ref::<StructArray>()
+ .expect("messages[1] should be a StructArray");
+ assert!(
+ messages1.column_by_name("ignored").is_none(),
+ "extra source fields should not appear in the logical schema"
+ );
+ let new_chain = messages1
+ .column_by_name("chain")
+ .unwrap()
+ .as_any()
+ .downcast_ref::<StringArray>()
+ .unwrap();
+ assert_eq!(new_chain.iter().collect::<Vec<_>>(), vec![Some("eth")]);
+
+ let projected = ctx
+ .sql(
+ "SELECT row_id, get_field(messages[1], 'id') AS msg_id, \
+ get_field(messages[1], 'chain') AS chain \
+ FROM t ORDER BY row_id",
+ )
+ .await?
+ .collect()
+ .await?;
+
+ #[rustfmt::skip]
+ let expected = [
+ "+--------+--------+-------+",
+ "| row_id | msg_id | chain |",
+ "+--------+--------+-------+",
+ "| 1 | 10 | |",
+ "| 2 | 30 | eth |",
+ "+--------+--------+-------+",
+ ];
+ assert_batches_eq!(expected, &projected);
+
+ Ok(())
+}
+
// Implement a custom PhysicalExprAdapterFactory that fills in missing columns
with
// the default value for the field type:
// - Int64 columns are filled with `1`
@@ -462,6 +863,125 @@ async fn
test_struct_schema_evolution_projection_and_filter() -> Result<()> {
Ok(())
}
+/// Macro to generate paired test functions for List and LargeList variants.
+/// Expands to two `#[tokio::test]` functions with the specified names.
+macro_rules! test_struct_schema_evolution_pair {
+ (
+ list: $list_test:ident,
+ large_list: $large_list_test:ident,
+ fn: $assertion_fn:path $(, args: $($arg:expr),+)?
+ ) => {
+ #[tokio::test]
+ async fn $list_test() {
+ $assertion_fn(NestedListKind::List $(, $($arg),+)?).await;
+ }
+
+ #[tokio::test]
+ async fn $large_list_test() {
+ $assertion_fn(NestedListKind::LargeList $(, $($arg),+)?).await;
+ }
+ };
+ (
+ list: $list_test:ident,
+ large_list: $large_list_test:ident,
+ fn_result: $assertion_fn:path
+ ) => {
+ #[tokio::test]
+ async fn $list_test() -> Result<()> {
+ $assertion_fn(NestedListKind::List).await
+ }
+
+ #[tokio::test]
+ async fn $large_list_test() -> Result<()> {
+ $assertion_fn(NestedListKind::LargeList).await
+ }
+ };
+}
+
+test_struct_schema_evolution_pair!(
+ list: test_list_struct_schema_evolution_end_to_end,
+ large_list: test_large_list_struct_schema_evolution_end_to_end,
+ fn_result: assert_nested_list_struct_schema_evolution
+);
+
+async fn assert_nested_list_struct_schema_evolution_errors(
+ kind: NestedListKind,
+ chain_type: DataType,
+ chain_nullable: bool,
+ expected_error: &str,
+) {
+ let batch = nested_messages_batch(
+ kind,
+ 1,
+ &[NestedMessageRow {
+ id: 10,
+ name: "alpha",
+ chain: Some("eth"),
+ ignored: None,
+ }],
+ &message_fields(DataType::Utf8, true, true, false),
+ );
+
+ let table_schema =
+ nested_list_table_schema(kind, target_message_fields(chain_type,
chain_nullable));
+
+ let ctx = setup_nested_list_test(
+ kind,
+ "struct_evolution_error",
+ vec![("data.parquet".to_string(), batch)],
+ table_schema,
+ )
+ .await;
+
+ let err = ctx
+ .sql("SELECT * FROM t")
+ .await
+ .unwrap()
+ .collect()
+ .await
+ .unwrap_err();
+ assert!(
+ err.to_string().contains(expected_error),
+ "expected error to contain '{expected_error}', got: {err}"
+ );
+}
+
+async fn assert_non_nullable_missing_chain_field_fails(kind: NestedListKind) {
+ assert_nested_list_struct_schema_evolution_errors(
+ kind,
+ DataType::Utf8,
+ false,
+ "non-nullable",
+ )
+ .await;
+}
+
+async fn assert_incompatible_chain_field_fails(kind: NestedListKind) {
+ assert_nested_list_struct_schema_evolution_errors(
+ kind,
+ incompatible_chain_type(),
+ true,
+ "Cannot cast struct field 'chain'",
+ )
+ .await;
+}
+
+fn incompatible_chain_type() -> DataType {
+ DataType::Struct(vec![Arc::new(Field::new("value", DataType::Utf8,
true))].into())
+}
+
+test_struct_schema_evolution_pair!(
+ list: test_list_struct_schema_evolution_non_nullable_missing_field_fails,
+ large_list:
test_large_list_struct_schema_evolution_non_nullable_missing_field_fails,
+ fn: assert_non_nullable_missing_chain_field_fails
+);
+
+test_struct_schema_evolution_pair!(
+ list: test_list_struct_schema_evolution_incompatible_field_fails,
+ large_list:
test_large_list_struct_schema_evolution_incompatible_field_fails,
+ fn: assert_incompatible_chain_field_fails
+);
+
/// Test demonstrating that a single PhysicalExprAdapterFactory instance can be
/// reused across multiple ListingTable instances.
///
diff --git a/datafusion/sqllogictest/test_files/schema_evolution_nested.slt
b/datafusion/sqllogictest/test_files/schema_evolution_nested.slt
new file mode 100644
index 0000000000..53bc16fe51
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/schema_evolution_nested.slt
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+# End-user-facing happy-path coverage for nested list/struct Parquet schema
evolution.
+#
+# These tests generate mixed-schema parquet files through SQL COPY statements
and
+# query them through CREATE EXTERNAL TABLE, rather than constructing batches
+# directly in Rust. Rust tests still cover more specialized fixture shapes and
+# failure paths.
+##########
+
+statement ok
+CREATE EXTERNAL TABLE list_messages (
+ row_id INT,
+ messages ARRAY<STRUCT<id BIGINT, name VARCHAR, chain VARCHAR>>
+)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/schema_evolution_nested/list_messages/';
+
+statement ok
+COPY (
+ SELECT
+ 1 AS row_id,
+ [
+ named_struct('id', 10, 'name', 'alpha'),
+ named_struct('id', 20, 'name', 'beta')
+ ] AS messages
+) TO 'test_files/scratch/schema_evolution_nested/list_messages/old.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+ SELECT
+ 2 AS row_id,
+ [
+ named_struct('id', 30, 'name', 'gamma', 'chain', 'eth', 'ignored', 99)
+ ] AS messages
+) TO 'test_files/scratch/schema_evolution_nested/list_messages/new.parquet'
+STORED AS PARQUET;
+
+query I?
+SELECT row_id, messages FROM list_messages ORDER BY row_id;
+----
+1 [{id: 10, name: alpha, chain: NULL}, {id: 20, name: beta, chain: NULL}]
+2 [{id: 30, name: gamma, chain: eth}]
+
+query IIT rowsort
+SELECT
+ row_id,
+ get_field(messages[1], 'id') AS msg_id,
+ get_field(messages[1], 'chain') AS chain
+FROM list_messages;
+----
+1 10 NULL
+2 30 eth
+
+statement ok
+COPY (
+ SELECT
+ 1 AS row_id,
+ arrow_cast(
+ [
+ named_struct('id', 10, 'name', 'alpha'),
+ named_struct('id', 20, 'name', 'beta')
+ ],
+ 'LargeList(Struct("id": Int64, "name": Utf8View))'
+ ) AS messages
+) TO
'test_files/scratch/schema_evolution_nested/large_list_messages/old.parquet'
+STORED AS PARQUET;
+
+statement ok
+COPY (
+ SELECT
+ 2 AS row_id,
+ arrow_cast(
+ [
+ named_struct('id', 30, 'name', 'gamma', 'chain', 'eth', 'ignored', 99)
+ ],
+ 'LargeList(Struct("id": Int64, "name": Utf8View, "chain": Utf8View,
"ignored": Int64))'
+ ) AS messages
+) TO
'test_files/scratch/schema_evolution_nested/large_list_messages/new.parquet'
+STORED AS PARQUET;
+
+statement ok
+CREATE EXTERNAL TABLE large_list_messages
+STORED AS PARQUET
+LOCATION 'test_files/scratch/schema_evolution_nested/large_list_messages/';
+
+query TTT
+DESCRIBE large_list_messages;
+----
+row_id Int64 NO
+messages LargeList(Struct("id": Int64, "name": Utf8View, "chain": Utf8View,
"ignored": Int64)) NO
+
+query I?
+SELECT row_id, messages FROM large_list_messages ORDER BY row_id;
+----
+1 [{id: 10, name: alpha, chain: NULL, ignored: NULL}, {id: 20, name: beta,
chain: NULL, ignored: NULL}]
+2 [{id: 30, name: gamma, chain: eth, ignored: 99}]
+
+query IIT rowsort
+SELECT
+ row_id,
+ get_field(messages[1], 'id') AS msg_id,
+ get_field(messages[1], 'chain') AS chain
+FROM large_list_messages;
+----
+1 10 NULL
+2 30 eth
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]