sdd commented on code in PR #383:
URL: https://github.com/apache/iceberg-rust/pull/383#discussion_r1630700456
##########
crates/iceberg/src/writer/base_writer/data_file_writer.rs:
##########
@@ -123,195 +126,35 @@ mod test {
location_generator::{test::MockLocationGenerator,
DefaultFileNameGenerator},
ParquetWriterBuilder,
},
- tests::check_parquet_data_file,
IcebergWriter, IcebergWriterBuilder,
},
};
#[tokio::test]
- async fn test_data_file_writer() -> Result<(), anyhow::Error> {
+ async fn test_parquet_writer() -> Result<()> {
let temp_dir = TempDir::new().unwrap();
let file_io = FileIOBuilder::new_fs_io().build().unwrap();
- let location_gen =
+ let loccation_gen =
MockLocationGenerator::new(temp_dir.path().to_str().unwrap().to_string());
let file_name_gen =
DefaultFileNameGenerator::new("test".to_string(), None,
DataFileFormat::Parquet);
- // prepare data
- // Int, Struct(Int), String, List(Int), Struct(Struct(Int))
- let schema = {
- let fields = vec![
- arrow_schema::Field::new("col0",
arrow_schema::DataType::Int64, true)
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "0".to_string(),
- )])),
- arrow_schema::Field::new(
- "col1",
- arrow_schema::DataType::Struct(
- vec![arrow_schema::Field::new(
- "sub_col",
- arrow_schema::DataType::Int64,
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "5".to_string(),
- )]))]
- .into(),
- ),
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "1".to_string(),
- )])),
- arrow_schema::Field::new("col2", arrow_schema::DataType::Utf8,
true).with_metadata(
- HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(),
"2".to_string())]),
- ),
- arrow_schema::Field::new(
- "col3",
- arrow_schema::DataType::List(Arc::new(
- arrow_schema::Field::new("item",
arrow_schema::DataType::Int64, true)
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "6".to_string(),
- )])),
- )),
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "3".to_string(),
- )])),
- arrow_schema::Field::new(
- "col4",
- arrow_schema::DataType::Struct(
- vec![arrow_schema::Field::new(
- "sub_col",
- arrow_schema::DataType::Struct(
- vec![arrow_schema::Field::new(
- "sub_sub_col",
- arrow_schema::DataType::Int64,
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "7".to_string(),
- )]))]
- .into(),
- ),
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "8".to_string(),
- )]))]
- .into(),
- ),
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "4".to_string(),
- )])),
- ];
- Arc::new(arrow_schema::Schema::new(fields))
- };
- let col0 = Arc::new(Int64Array::from_iter_values(vec![1; 1024])) as
ArrayRef;
- let col1 = Arc::new(StructArray::new(
- vec![
- arrow_schema::Field::new("sub_col",
arrow_schema::DataType::Int64, true)
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "5".to_string(),
- )])),
- ]
- .into(),
- vec![Arc::new(Int64Array::from_iter_values(vec![1; 1024]))],
- None,
- ));
- let col2 = Arc::new(arrow_array::StringArray::from_iter_values(vec![
- "test";
- 1024
- ])) as ArrayRef;
- let col3 = Arc::new({
- let list_parts =
arrow_array::ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
- Some(
- vec![Some(1),]
- );
- 1024
- ])
- .into_parts();
- arrow_array::ListArray::new(
-
Arc::new(list_parts.0.as_ref().clone().with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "6".to_string(),
- )]))),
- list_parts.1,
- list_parts.2,
- list_parts.3,
- )
- }) as ArrayRef;
- let col4 = Arc::new(StructArray::new(
- vec![arrow_schema::Field::new(
- "sub_col",
- arrow_schema::DataType::Struct(
- vec![arrow_schema::Field::new(
- "sub_sub_col",
- arrow_schema::DataType::Int64,
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "7".to_string(),
- )]))]
- .into(),
- ),
- true,
- )
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "8".to_string(),
- )]))]
- .into(),
- vec![Arc::new(StructArray::new(
- vec![
- arrow_schema::Field::new("sub_sub_col",
arrow_schema::DataType::Int64, true)
- .with_metadata(HashMap::from([(
- PARQUET_FIELD_ID_META_KEY.to_string(),
- "7".to_string(),
- )])),
- ]
- .into(),
- vec![Arc::new(Int64Array::from_iter_values(vec![1; 1024]))],
- None,
- ))],
- None,
- ));
- let to_write =
- RecordBatch::try_new(schema.clone(), vec![col0, col1, col2, col3,
col4]).unwrap();
-
- // prepare writer
- let pb = ParquetWriterBuilder::new(
+ let pw = ParquetWriterBuilder::new(
WriterProperties::builder().build(),
- to_write.schema(),
+ Arc::new(Schema::builder().build().unwrap()),
file_io.clone(),
- location_gen,
+ loccation_gen,
Review Comment:
Typo: `loccation`, should be `location`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]