stevenzwu commented on code in PR #7171:
URL: https://github.com/apache/iceberg/pull/7171#discussion_r1195914753


##########
flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergFilesCommitter.java:
##########
@@ -877,6 +887,130 @@ public void testCommitTwoCheckpointsInSingleTxn() throws 
Exception {
     }
   }
 
+  @Test
+  public void testSpecEvolution() throws Exception {
+    long timestamp = 0;
+    int checkpointId = 0;
+    List<RowData> rows = Lists.newArrayList();
+    JobID jobId = new JobID();
+
+    OperatorID operatorId;
+    OperatorSubtaskState snapshot;
+    DataFile dataFile;
+    int specId;
+
+    try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = 
createStreamSink(jobId)) {
+      harness.setup();
+      harness.open();
+      operatorId = harness.getOperator().getOperatorID();
+
+      assertSnapshotSize(0);
+
+      checkpointId++;
+      RowData rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // table unpartitioned
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData));
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(

Review Comment:
   nit: use assertj instead. 
   https://iceberg.apache.org/contribute/#assertj
   
   then we can use format string for error msg
   ```
     public SELF as(String description, Object... args) {
       return super.as(description, args);
     }
   ```
   
   



##########
flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergFilesCommitter.java:
##########
@@ -877,6 +887,130 @@ public void testCommitTwoCheckpointsInSingleTxn() throws 
Exception {
     }
   }
 
+  @Test
+  public void testSpecEvolution() throws Exception {
+    long timestamp = 0;
+    int checkpointId = 0;
+    List<RowData> rows = Lists.newArrayList();
+    JobID jobId = new JobID();
+
+    OperatorID operatorId;
+    OperatorSubtaskState snapshot;
+    DataFile dataFile;
+    int specId;
+
+    try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = 
createStreamSink(jobId)) {
+      harness.setup();
+      harness.open();
+      operatorId = harness.getOperator().getOperatorID();
+
+      assertSnapshotSize(0);
+
+      checkpointId++;
+      RowData rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // table unpartitioned
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData));
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(
+          String.format(
+              "Expected the partition spec ID of staging manifest is %s, but 
the ID is: %s",
+              table.spec().specId(), specId),
+          table.spec().specId(),
+          specId);
+
+      harness.notifyOfCompletedCheckpoint(checkpointId);
+
+      // Change partition spec
+      table.refresh();
+      PartitionSpec oldSpec = table.spec();
+      table.updateSpec().addField("id").commit();
+
+      checkpointId++;
+      rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // write data with old partition spec
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData), oldSpec, null);
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      snapshot = harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(
+          String.format(
+              "Expected the partition spec ID of staging manifest is %s, but 
the ID is: %s",
+              oldSpec.specId(), specId),
+          oldSpec.specId(),
+          specId);
+
+      harness.notifyOfCompletedCheckpoint(checkpointId);
+
+      assertFlinkManifests(0);
+
+      SimpleDataUtil.assertTableRows(table, ImmutableList.copyOf(rows), 
branch);
+      assertSnapshotSize(checkpointId);
+      assertMaxCommittedCheckpointId(jobId, operatorId, checkpointId);
+      Assert.assertEquals(
+          TestIcebergFilesCommitter.class.getName(),
+          SimpleDataUtil.latestSnapshot(table, 
branch).summary().get("flink.test"));
+    }
+
+    // Restore from the given snapshot
+    try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = 
createStreamSink(jobId)) {
+      harness.getStreamConfig().setOperatorID(operatorId);
+      harness.setup();
+      harness.initializeState(snapshot);
+      harness.open();
+
+      SimpleDataUtil.assertTableRows(table, rows, branch);
+      assertSnapshotSize(checkpointId);
+      assertMaxCommittedCheckpointId(jobId, operatorId, checkpointId);
+
+      checkpointId++;
+      RowData row = SimpleDataUtil.createRowData(checkpointId, "world" + 
checkpointId);
+      StructLike partition = new PartitionData(table.spec().partitionType());
+      partition.set(0, checkpointId);
+      dataFile =
+          writeDataFile("data-" + checkpointId, ImmutableList.of(row), 
table.spec(), partition);
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(row);
+      harness.snapshot(checkpointId, ++timestamp);
+      assertFlinkManifests(1);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(

Review Comment:
   nit: same comment for assertj style



##########
flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergFilesCommitter.java:
##########
@@ -877,6 +887,130 @@ public void testCommitTwoCheckpointsInSingleTxn() throws 
Exception {
     }
   }
 
+  @Test
+  public void testSpecEvolution() throws Exception {
+    long timestamp = 0;
+    int checkpointId = 0;
+    List<RowData> rows = Lists.newArrayList();
+    JobID jobId = new JobID();
+
+    OperatorID operatorId;
+    OperatorSubtaskState snapshot;
+    DataFile dataFile;
+    int specId;
+
+    try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = 
createStreamSink(jobId)) {
+      harness.setup();
+      harness.open();
+      operatorId = harness.getOperator().getOperatorID();
+
+      assertSnapshotSize(0);
+
+      checkpointId++;
+      RowData rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // table unpartitioned
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData));
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(
+          String.format(
+              "Expected the partition spec ID of staging manifest is %s, but 
the ID is: %s",
+              table.spec().specId(), specId),
+          table.spec().specId(),
+          specId);
+
+      harness.notifyOfCompletedCheckpoint(checkpointId);
+
+      // Change partition spec
+      table.refresh();
+      PartitionSpec oldSpec = table.spec();
+      table.updateSpec().addField("id").commit();
+
+      checkpointId++;
+      rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // write data with old partition spec
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData), oldSpec, null);
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      snapshot = harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(
+          String.format(
+              "Expected the partition spec ID of staging manifest is %s, but 
the ID is: %s",
+              oldSpec.specId(), specId),
+          oldSpec.specId(),
+          specId);
+
+      harness.notifyOfCompletedCheckpoint(checkpointId);
+
+      assertFlinkManifests(0);
+
+      SimpleDataUtil.assertTableRows(table, ImmutableList.copyOf(rows), 
branch);
+      assertSnapshotSize(checkpointId);
+      assertMaxCommittedCheckpointId(jobId, operatorId, checkpointId);
+      Assert.assertEquals(

Review Comment:
   nit: not sure if this assertion is needed in this test



##########
flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergFilesCommitter.java:
##########
@@ -877,6 +887,130 @@ public void testCommitTwoCheckpointsInSingleTxn() throws 
Exception {
     }
   }
 
+  @Test
+  public void testSpecEvolution() throws Exception {
+    long timestamp = 0;
+    int checkpointId = 0;
+    List<RowData> rows = Lists.newArrayList();
+    JobID jobId = new JobID();
+
+    OperatorID operatorId;
+    OperatorSubtaskState snapshot;
+    DataFile dataFile;
+    int specId;
+
+    try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = 
createStreamSink(jobId)) {
+      harness.setup();
+      harness.open();
+      operatorId = harness.getOperator().getOperatorID();
+
+      assertSnapshotSize(0);
+
+      checkpointId++;
+      RowData rowData = SimpleDataUtil.createRowData(checkpointId, "hello" + 
checkpointId);
+      // table unpartitioned
+      dataFile = writeDataFile("data-" + checkpointId, 
ImmutableList.of(rowData));
+      harness.processElement(of(dataFile), ++timestamp);
+      rows.add(rowData);
+      harness.snapshot(checkpointId, ++timestamp);
+
+      specId =
+          
getStagingManifestSpecId(harness.getOperator().getOperatorStateBackend(), 
checkpointId);
+      Assert.assertEquals(

Review Comment:
   the error msg might be unnecessary with the assertj style



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to