yshcz opened a new issue, #1973:
URL: https://github.com/apache/iceberg-rust/issues/1973

   ### Apache Iceberg Rust version
   
   None
   
   ### Describe the bug
   
   The spec requires that manifests written in format version 2+ must include 
the content field in the Avro file key-value metadata.
   
   Currently the writer only writes the content metadata for V2 manifests. V3 
manifests are missing this required field. This causes a roundtrip problem 
where V3 delete manifests written by iceberg-rust are read back as data 
manifests.
   
   Probably the fix is just updating the condition that writes the content 
metadata field to include V3.
   
   ### To Reproduce
   
   Add the following test at `crates/iceberg/src/spec/manifest/mod.rs`
   
   ```rust
   #[tokio::test]
   async fn test_v3_delete_manifest_delte_file_roundtrip() {
       let schema = Arc::new(
           Schema::builder()
               .with_fields(vec![
                   Arc::new(NestedField::optional(
                       1,
                       "id",
                       Type::Primitive(PrimitiveType::Long),
                   )),
                   Arc::new(NestedField::optional(
                       2,
                       "data",
                       Type::Primitive(PrimitiveType::String),
                   )),
               ])
               .build()
               .unwrap(),
       );
   
       let partition_spec = PartitionSpec::builder(schema.clone())
           .with_spec_id(0)
           .build()
           .unwrap();
   
       // Create a position delete file entry
       let delete_entry = ManifestEntry {
           status: ManifestStatus::Added,
           snapshot_id: None,
           sequence_number: None,
           file_sequence_number: None,
           data_file: DataFile {
               content: DataContentType::PositionDeletes,
               file_path: 
"s3://bucket/table/data/delete-00000.parquet".to_string(),
               file_format: DataFileFormat::Parquet,
               partition: Struct::empty(),
               record_count: 10,
               file_size_in_bytes: 1024,
               column_sizes: HashMap::new(),
               value_counts: HashMap::new(),
               null_value_counts: HashMap::new(),
               nan_value_counts: HashMap::new(),
               lower_bounds: HashMap::new(),
               upper_bounds: HashMap::new(),
               key_metadata: None,
               split_offsets: None,
               equality_ids: None,
               sort_order_id: None,
               partition_spec_id: 0,
               first_row_id: None,
               referenced_data_file: None,
               content_offset: None,
               content_size_in_bytes: None,
           },
       };
   
       // Write a V3 delete manifest
       let tmp_dir = TempDir::new().unwrap();
       let path = tmp_dir.path().join("v3_delete_manifest.avro");
       let io = FileIOBuilder::new_fs_io().build().unwrap();
       let output_file = io.new_output(path.to_str().unwrap()).unwrap();
   
       let mut writer = ManifestWriterBuilder::new(
           output_file,
           Some(1),
           None,
           schema.clone(),
           partition_spec.clone(),
       )
       .build_v3_deletes();
   
       writer.add_entry(delete_entry).unwrap();
       let manifest_file = writer.write_manifest_file().await.unwrap();
   
       // The returned ManifestFile correctly reports Deletes content
       assert_eq!(manifest_file.content, ManifestContentType::Deletes);
   
       // Read back the manifest file
       let actual_manifest =
           Manifest::parse_avro(fs::read(&path).expect("read_file must 
succeed").as_slice())
               .unwrap();
   
       // The content type reads as Data due to the bug.
       assert_eq!(
           actual_manifest.metadata().content,
           ManifestContentType::Data,
       );
   
       // Expected:
       // assert_eq!(
       //     actual_manifest.metadata().content,
       //     ManifestContentType::Deletes,
       // );
   }
   ```
   
   ### Expected behavior
   
   _No response_
   
   ### Willingness to contribute
   
   None


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to