pnowojski commented on code in PR #27068: URL: https://github.com/apache/flink/pull/27068#discussion_r2414226019
########## flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/operators/sink/SinkUpsertMaterializerMigrationTest.java: ########## @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.runtime.operators.sink; + +import org.apache.flink.FlinkVersion; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness; +import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; +import org.apache.flink.streaming.util.OperatorSnapshotUtil; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.runtime.typeutils.RowDataSerializer; +import org.apache.flink.table.types.logical.RowType; +import org.apache.flink.test.util.MigrationTest; +import org.apache.flink.types.RowKind; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.apache.flink.FlinkVersion.current; +import static org.apache.flink.streaming.util.OperatorSnapshotUtil.getResourceFilename; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.ASSERTOR; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.EQUALISER; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.LOGICAL_TYPES; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.TTL_CONFIG; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.UPSERT_KEY; +import static org.apache.flink.table.runtime.operators.sink.SinkUpsertMaterializerTest.UPSERT_KEY_EQUALISER; +import static org.apache.flink.table.runtime.util.StreamRecordUtils.deleteRecord; +import static org.apache.flink.table.runtime.util.StreamRecordUtils.insertRecord; +import static org.apache.flink.table.runtime.util.StreamRecordUtils.rowOfKind; +import static org.apache.flink.table.runtime.util.StreamRecordUtils.updateAfterRecord; + +/** Test for {@link SinkUpsertMaterializer} migration. */ +@RunWith(Parameterized.class) +public class SinkUpsertMaterializerMigrationTest implements MigrationTest { + + private static final String FOLDER_NAME = "sink-upsert-materializer"; + + @Parameterized.Parameter(0) + @SuppressWarnings({"ClassEscapesDefinedScope", "DefaultAnnotationParam"}) + public SinkOperationMode migrateFrom; + + @Parameterized.Parameter(1) + @SuppressWarnings("ClassEscapesDefinedScope") + public SinkOperationMode migrateTo; + + @Parameterized.Parameters(name = "{0} -> {1}") + public static List<Object[]> parameters() { + List<Object[]> result = new ArrayList<>(); + Set<FlinkVersion> versions = FlinkVersion.rangeOf(FlinkVersion.v2_2, FlinkVersion.v2_2); + for (FlinkVersion fromVersion : versions) { + for (SinkUpsertMaterializerStateBackend backend : + SinkUpsertMaterializerStateBackend.values()) { + result.add( + new Object[] { + new SinkOperationMode(fromVersion, backend), + new SinkOperationMode(current(), backend) + }); + } + } + return result; + } + + @Test + public void testMigration() throws Exception { + String path = getResourceFilename(FOLDER_NAME + "/" + getFileName(migrateFrom)); + try (OneInputStreamOperatorTestHarness<RowData, RowData> harness = + createHarness(migrateTo, path)) { + testCorrectnessAfterSnapshot(harness); + } + } + + private OneInputStreamOperatorTestHarness<RowData, RowData> createHarness( + SinkOperationMode mode, String snapshotPath) throws Exception { + int[] inputUpsertKey = {UPSERT_KEY}; + OneInputStreamOperator<RowData, RowData> materializer = + SinkUpsertMaterializer.create( + TTL_CONFIG, + RowType.of(LOGICAL_TYPES), + EQUALISER, + UPSERT_KEY_EQUALISER, + inputUpsertKey); + KeyedOneInputStreamOperatorTestHarness<RowData, RowData, RowData> harness = + SinkUpsertMaterializerTest.createHarness( + materializer, mode.stateBackend, LOGICAL_TYPES); + harness.setup(new RowDataSerializer(LOGICAL_TYPES)); + if (snapshotPath != null) { + harness.initializeState(snapshotPath); + } + harness.open(); + harness.setStateTtlProcessingTime(1); + return harness; + } + + private void testCorrectnessBeforeSnapshot( + OneInputStreamOperatorTestHarness<RowData, RowData> testHarness) throws Exception { + + testHarness.processElement(insertRecord(1L, 1, "a1")); + ASSERTOR.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, 1L, 1, "a1")); + + testHarness.processElement(updateAfterRecord(1L, 1, "a11")); + ASSERTOR.shouldEmit(testHarness, rowOfKind(RowKind.UPDATE_AFTER, 1L, 1, "a11")); + + testHarness.processElement(insertRecord(3L, 1, "a3")); + ASSERTOR.shouldEmit(testHarness, rowOfKind(RowKind.UPDATE_AFTER, 3L, 1, "a3")); + } + + private void testCorrectnessAfterSnapshot( + OneInputStreamOperatorTestHarness<RowData, RowData> testHarness) throws Exception { + testHarness.processElement(deleteRecord(1L, 1, "a111")); + ASSERTOR.shouldEmitNothing(testHarness); + + testHarness.processElement(deleteRecord(3L, 1, "a33")); + ASSERTOR.shouldEmit(testHarness, rowOfKind(RowKind.DELETE, 3L, 1, "a33")); + + testHarness.processElement(insertRecord(4L, 1, "a4")); + ASSERTOR.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, 4L, 1, "a4")); + + testHarness.setStateTtlProcessingTime(1002); + testHarness.processElement(deleteRecord(4L, 1, "a4")); + ASSERTOR.shouldEmitNothing(testHarness); + } Review Comment: either now, or later when you add the version based on `LinkedMultiSetState`, I would add a bit more migration tests. - combinations (legacy -> linked, legacy -> value, linked -> value, value -> linked) - with list of 3 elements for one key - combinations of removal: - first remove head element - first remove middle element - first remove tail element - maybe we could abstract the steps, and then implement a generic test that would trigger snapshot and migration at various stages, instead of one fixed place (As you have here "before snapshot" and "after snapshot")? ########## flink-table/flink-table-runtime/src/test/java/org/apache/flink/table/runtime/operators/sink/SinkUpsertMaterializerTest.java: ########## @@ -70,83 +103,236 @@ public RecordEqualiser newInstance(ClassLoader classLoader) { } }; + /** + * If the composite serializer in {@link SinkUpsertMaterializer} works on projected fields then + * it might use the wrong serializer, e.g. the {@link VarCharType} instead of the {@link + * IntType}. That might cause {@link ArrayIndexOutOfBoundsException} because string serializer + * expects the first number to be the length of the string. + */ @Test - void test() throws Exception { - SinkUpsertMaterializer materializer = - new SinkUpsertMaterializer( - ttlConfig, serializer, equaliser, upsertKeyEqualiser, null); + public void testUpsertKeySerializerFailure() throws Exception { + LogicalType[] types = new LogicalType[] {new VarCharType(), new IntType()}; + // project int field, while in the original record it's string + + OneInputStreamOperator<RowData, RowData> materializer = createOperator(types, 1); + try (KeyedOneInputStreamOperatorTestHarness<RowData, RowData, RowData> testHarness = + createHarness(materializer, stateBackend, types)) { + testHarness.open(); + RowDataHarnessAssertor assertor = new RowDataHarnessAssertor(types); + // -1 is not a valid string length + testHarness.processElement(binaryRecord(RowKind.INSERT, "any string", -1)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, "any string", -1)); + + // 999 as a string length is too long + testHarness.processElement(binaryRecord(RowKind.INSERT, "any string", 999)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, "any string", 999)); + } + } + + @Test + public void testUpsertKeySerializerSilentCorruption() throws Exception { + LogicalType[] types = + new LogicalType[] {new VarCharType(), new BigIntType(), new IntType()}; + // project int field, while in the original record it's string + + OneInputStreamOperator<RowData, RowData> materializer = createOperator(types, 1); + try (KeyedOneInputStreamOperatorTestHarness<RowData, RowData, RowData> testHarness = + createHarness(materializer, stateBackend, types)) { + testHarness.open(); + RowDataHarnessAssertor assertor = new RowDataHarnessAssertor(types); + + // this might serialize upsert key as 32-character string potentially including "97" + testHarness.processElement(binaryRecord(RowKind.INSERT, "any string", 32L, 97)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, "any string", 32L, 97)); + + // but here it might include "98" which would result in no output and test failure + testHarness.processElement(binaryRecord(RowKind.DELETE, "any string", 32L, 98)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.DELETE, "any string", 32L, 98)); + } + } + + @Test + public void testUpsertEqualizer() throws Exception { + LogicalType[] types = new LogicalType[] {new IntType(), new BigIntType()}; + + OneInputStreamOperator<RowData, RowData> materializer = createOperator(types, 1); + try (KeyedOneInputStreamOperatorTestHarness<RowData, RowData, RowData> testHarness = + createHarness(materializer, stateBackend, types)) { + testHarness.open(); + RowDataHarnessAssertor assertor = new RowDataHarnessAssertor(types); + + // upsert key is 33, 0 is unused + testHarness.processElement(binaryRecord(RowKind.INSERT, 0, 33L)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.INSERT, 0, 33L)); + + // upsert key 33 - should remove AND clear the state involving upsert equalizer + // equalizer might fail if it's used on un-projected records + testHarness.processElement(binaryRecord(RowKind.DELETE, 1, 33L)); + assertor.shouldEmit(testHarness, rowOfKind(RowKind.DELETE, 1, 33L)); + } + } Review Comment: Hmm? This doesn't sound like a refactor. Ditto about some other changes in this commit & file > [FLINK-38460] Refactor SinkUpsertMaterializer tests did you mix up something? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
