viirya commented on code in PR #2545:
URL: https://github.com/apache/iceberg-rust/pull/2545#discussion_r3334883238
##########
crates/iceberg/src/transaction/append.rs:
##########
@@ -147,14 +152,192 @@ impl SnapshotProduceOperation for FastAppendOperation {
#[cfg(test)]
mod tests {
use std::collections::HashMap;
+ use std::fs;
use std::sync::Arc;
+ use minijinja::{AutoEscape, Environment, Value, context};
+ use tempfile::TempDir;
+ use uuid::Uuid;
+
+ use super::FastAppendOperation;
+ use crate::io::FileIO;
use crate::spec::{
- DataContentType, DataFileBuilder, DataFileFormat, Literal,
MAIN_BRANCH, Struct,
+ DataContentType, DataFileBuilder, DataFileFormat, Literal,
MAIN_BRANCH, ManifestEntry,
+ ManifestListWriter, ManifestStatus, ManifestWriterBuilder, Struct,
TableMetadata,
};
+ use crate::table::Table;
+ use crate::test_utils::test_runtime;
+ use crate::transaction::snapshot::{SnapshotProduceOperation,
SnapshotProducer};
use crate::transaction::tests::make_v2_minimal_table;
use crate::transaction::{Transaction, TransactionAction};
- use crate::{TableRequirement, TableUpdate};
+ use crate::{TableIdent, TableRequirement, TableUpdate};
+
+ fn render_template(template: &str, ctx: Value) -> String {
+ let mut env = Environment::new();
+ env.set_auto_escape_callback(|_| AutoEscape::None);
+ env.render_str(template, ctx).unwrap()
+ }
+
+ /// Builds a table whose current snapshot's manifest list contains a data
manifest
+ /// followed by a delete-only manifest (one entry with
`ManifestStatus::Deleted`,
+ /// so `deleted_files_count > 0` while `added_files_count ==
existing_files_count == 0`).
+ ///
+ /// Returns the table plus the `manifest_path` of the delete-only manifest
so callers
+ /// can assert whether `existing_manifest()` carries it forward.
+ async fn make_table_with_delete_only_manifest() -> (Table, TempDir,
String) {
+ let tmp_dir = TempDir::new().unwrap();
+ let table_location = tmp_dir.path().join("table1");
+ let manifest_list_location =
table_location.join("metadata/manifests_list_1.avro");
+ let table_metadata_location = table_location.join("metadata/v1.json");
+
+ let file_io = FileIO::new_with_fs();
+
+ let template = fs::read_to_string(format!(
+ "{}/testdata/example_table_metadata_v2.json",
+ env!("CARGO_MANIFEST_DIR")
+ ))
+ .unwrap();
+ // The template has two snapshots; point the current one at our
manifest list.
+ let metadata_json = render_template(&template, context! {
+ table_location => &table_location,
+ manifest_list_1_location => &manifest_list_location,
+ manifest_list_2_location => &manifest_list_location,
+ table_metadata_1_location => &table_metadata_location,
+ });
+ let table_metadata =
serde_json::from_str::<TableMetadata>(&metadata_json).unwrap();
+
+ let table = Table::builder()
+ .metadata(table_metadata)
+ .identifier(TableIdent::from_strs(["db", "table1"]).unwrap())
+ .file_io(file_io)
+ .metadata_location(table_metadata_location.to_str().unwrap())
+ .runtime(test_runtime())
+ .build()
+ .unwrap();
+
+ let current_snapshot = table.metadata().current_snapshot().unwrap();
+ let schema = current_snapshot.schema(table.metadata()).unwrap();
+ let partition_spec = table.metadata().default_partition_spec();
+
+ let next_manifest_file = |location: &str| {
+ table
+ .file_io()
+ .new_output(format!(
+ "{}/metadata/manifest_{}.avro",
+ location,
+ Uuid::new_v4()
+ ))
+ .unwrap()
+ };
+ let table_location_str = table_location.to_str().unwrap().to_string();
+
+ // Data manifest: one Added data file.
+ let mut data_writer = ManifestWriterBuilder::new(
+ next_manifest_file(&table_location_str),
+ Some(current_snapshot.snapshot_id()),
+ None,
+ schema.clone(),
+ partition_spec.as_ref().clone(),
+ )
+ .build_v2_data();
+ data_writer
+ .add_entry(
+ ManifestEntry::builder()
+ .status(ManifestStatus::Added)
+ .data_file(
+ DataFileBuilder::default()
+ .partition_spec_id(0)
+ .content(DataContentType::Data)
+
.file_path(format!("{table_location_str}/data.parquet"))
+ .file_format(DataFileFormat::Parquet)
+ .file_size_in_bytes(100)
+ .record_count(1)
+
.partition(Struct::from_iter([Some(Literal::long(100))]))
+ .build()
+ .unwrap(),
+ )
+ .build(),
+ )
+ .unwrap();
+ let data_manifest = data_writer.write_manifest_file().await.unwrap();
+
+ // Delete-only manifest: a single Deleted entry, nothing added or
existing.
+ let mut delete_writer = ManifestWriterBuilder::new(
+ next_manifest_file(&table_location_str),
+ Some(current_snapshot.snapshot_id()),
+ None,
+ schema.clone(),
+ partition_spec.as_ref().clone(),
+ )
+ .build_v2_data();
+ delete_writer
+ .add_delete_entry(
+ ManifestEntry::builder()
+ .status(ManifestStatus::Deleted)
+ .sequence_number(0)
+ .file_sequence_number(0)
+ .data_file(
+ DataFileBuilder::default()
+ .partition_spec_id(0)
+ .content(DataContentType::Data)
+
.file_path(format!("{table_location_str}/removed.parquet"))
+ .file_format(DataFileFormat::Parquet)
+ .file_size_in_bytes(100)
+ .record_count(1)
+
.partition(Struct::from_iter([Some(Literal::long(100))]))
+ .build()
+ .unwrap(),
+ )
+ .build(),
+ )
+ .unwrap();
+ let delete_manifest =
delete_writer.write_manifest_file().await.unwrap();
+ let delete_manifest_path = delete_manifest.manifest_path.clone();
+
+ // Sanity: the delete manifest really is delete-only.
+ assert!(delete_manifest.has_deleted_files());
+ assert!(!delete_manifest.has_added_files());
+ assert!(!delete_manifest.has_existing_files());
+
+ let mut manifest_list_write = ManifestListWriter::v2(
+ table
+ .file_io()
+ .new_output(current_snapshot.manifest_list())
+ .unwrap(),
+ current_snapshot.snapshot_id(),
+ current_snapshot.parent_snapshot_id(),
+ current_snapshot.sequence_number(),
+ );
+ manifest_list_write
+ .add_manifests(vec![data_manifest, delete_manifest].into_iter())
+ .unwrap();
+ manifest_list_write.close().await.unwrap();
+
+ (table, tmp_dir, delete_manifest_path)
+ }
+
+ /// Regression test for #2148: `FastAppendOperation::existing_manifest()`
must carry
+ /// delete-only manifests forward into the new snapshot. Dropping them
lets the files
+ /// they mark as removed reappear as live data on the next append.
+ #[tokio::test]
+ async fn test_existing_manifest_preserves_delete_only_manifest() {
+ let (table, _tmp_dir, delete_manifest_path) =
make_table_with_delete_only_manifest().await;
+
+ let producer = SnapshotProducer::new(&table, Uuid::now_v7(), None,
HashMap::new(), vec![]);
Review Comment:
Good call — switched the test to drive the actual `fast_append().commit()`
path and assert the delete-only manifest survives in the new snapshot's
manifest list (following the `test_fast_append` pattern), instead of calling
`existing_manifest()` directly.
One note: the test setup still uses the manifest writers to seed a
delete-only manifest in the pre-existing snapshot, since there's no
higher-level API to produce one yet (which is also why this bug is currently
latent). Only the action under test now goes through the `FastAppend` API.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]