StefanRRichter commented on a change in pull request #6875: [FLINK-9808] [state backends] Migrate state when necessary in state backends URL: https://github.com/apache/flink/pull/6875#discussion_r227722870
########## File path: flink-runtime/src/test/java/org/apache/flink/runtime/state/StateBackendMigrationTestBase.java ########## @@ -0,0 +1,780 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.runtime.state; + +import org.apache.flink.api.common.JobID; +import org.apache.flink.api.common.state.ValueState; +import org.apache.flink.api.common.state.ValueStateDescriptor; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility; +import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.runtime.checkpoint.CheckpointOptions; +import org.apache.flink.runtime.checkpoint.StateObjectCollection; +import org.apache.flink.runtime.execution.Environment; +import org.apache.flink.runtime.operators.testutils.DummyEnvironment; +import org.apache.flink.types.StringValue; +import org.apache.flink.util.TestLogger; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; + +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.RunnableFuture; + +import static org.junit.Assert.assertEquals; + +/** + * Tests for the {@link KeyedStateBackend} and {@link OperatorStateBackend} as produced + * by various {@link StateBackend}s. + */ +@SuppressWarnings("serial") +public abstract class StateBackendMigrationTestBase<B extends AbstractStateBackend> extends TestLogger { + + @Rule + public final ExpectedException expectedException = ExpectedException.none(); + + // lazily initialized stream storage + private CheckpointStorageLocation checkpointStorageLocation; + + /** + * Different "personalities" of {@link CustomStringSerializer}. Instead of creating + * different classes we parameterize the serializer with this and + * {@link CustomStringSerializerSnapshot} will instantiate serializers with the correct + * personality. + */ + public enum SerializerVersion { + INITIAL, + RESTORE, + NEW + } + + /** + * The compatibility behaviour of {@link CustomStringSerializer}. This controls what + * type of serializer {@link CustomStringSerializerSnapshot} will create for + * the different methods that return/create serializers. + */ + public enum SerializerCompatibilityType { + COMPATIBLE_AS_IS, + REQUIRES_MIGRATION + } + + /** + * The serialization timeliness behaviour of the state backend under test. + */ + public enum BackendSerializationTimeliness { + ON_ACCESS, + ON_CHECKPOINTS + } + + @Test + @SuppressWarnings("unchecked") + public void testValueStateWithSerializerRequiringMigration() throws Exception { + CustomStringSerializer.resetCountingMaps(); + + CheckpointStreamFactory streamFactory = createStreamFactory(); + SharedStateRegistry sharedStateRegistry = new SharedStateRegistry(); + AbstractKeyedStateBackend<Integer> backend = createKeyedBackend(IntSerializer.INSTANCE); + + ValueStateDescriptor<String> kvId = new ValueStateDescriptor<>( + "id", + new CustomStringSerializer(SerializerCompatibilityType.COMPATIBLE_AS_IS, SerializerVersion.INITIAL)); + ValueState<String> state = backend.getPartitionedState(VoidNamespace.INSTANCE, CustomVoidNamespaceSerializer.INSTANCE, kvId); + + // ============ Modifications to the state ============ + // For eager serialization backends: + // This should result in serializer personality INITIAL having 2 serialize calls + // + // For lazy serialization backends: + // This should not result in any serialize / deserialize calls + + backend.setCurrentKey(1); + state.update("1"); + backend.setCurrentKey(2); + state.update("2"); + backend.setCurrentKey(1); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals((Integer) 2, CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL)); + } else { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL)); + } + CustomStringSerializer.resetCountingMaps(); + + // ============ Snapshot #1 ============ + // For eager serialization backends: + // This should not result in any serialize / deserialize calls + // + // For lazy serialization backends: + // This should result in serializer personality INITIAL having 2 serialize calls + KeyedStateHandle snapshot1 = runSnapshot( + backend.snapshot(1L, 2L, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation()), + sharedStateRegistry); + backend.dispose(); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL)); + } else { + assertEquals((Integer) 2, CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL)); + } + CustomStringSerializer.resetCountingMaps(); + + // ============ Restore from snapshot #1 ============ + // For eager serialization backends: + // This should not result in any serialize / deserialize calls + // + // For lazy serialization backends: + // This should result in serializer personality RESTORE having 2 deserialize calls + backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot1); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL)); + } else { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE)); + assertEquals((Integer) 2, CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE)); + } + CustomStringSerializer.resetCountingMaps(); + + ValueStateDescriptor<String> newKvId = new ValueStateDescriptor<>("id", + new CustomStringSerializer(SerializerCompatibilityType.REQUIRES_MIGRATION, SerializerVersion.NEW)); + + // ============ State registration that triggers state migration ============ + // For eager serialization backends: + // This should result in serializer personality RESTORE having 2 deserialize calls, and NEW having 2 serialize calls + // + // For lazy serialization backends: + // This should not result in any serialize / deserialize calls + ValueState<String> restored1 = backend.getPartitionedState(VoidNamespace.INSTANCE, CustomVoidNamespaceSerializer.INSTANCE, newKvId); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE)); + assertEquals((Integer) 2, CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE)); + + assertEquals((Integer) 2, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } else { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE)); + + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } + CustomStringSerializer.resetCountingMaps(); + + // ============ More modifications to the state ============ + // For eager serialization backends: + // This should result in serializer personality NEW having 2 serialize calls and 3 deserialize calls + // + // For lazy serialization backends: + // This should not result in any serialize / deserialize calls + backend.setCurrentKey(1); + assertEquals("1", restored1.value()); + restored1.update("1"); // s, NEW + backend.setCurrentKey(2); + assertEquals("2", restored1.value()); + restored1.update("3"); // s, NEW + assertEquals("3", restored1.value()); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals((Integer) 2, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals((Integer) 3, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } else { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } + CustomStringSerializer.resetCountingMaps(); + + // ============ Snapshot #2 ============ + // For eager serialization backends: + // This should not result in any serialize / deserialize calls + // + // For lazy serialization backends: + // This should result in serializer personality NEW having 2 serialize calls + KeyedStateHandle snapshot2 = runSnapshot( + backend.snapshot(2L, 3L, streamFactory, CheckpointOptions.forCheckpointWithDefaultLocation()), + sharedStateRegistry); + backend.dispose(); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } else { + assertEquals((Integer) 2, CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW)); + } + CustomStringSerializer.resetCountingMaps(); + + // and restore once with NEW from NEW so that we see a read using the NEW serializer + // on the file backend + ValueStateDescriptor<String> newKvId2 = new ValueStateDescriptor<>( + "id", + new CustomStringSerializer(SerializerCompatibilityType.COMPATIBLE_AS_IS, SerializerVersion.NEW)); + + // ============ Restore from snapshot #2 ============ + // For eager serialization backends: + // This should not result in any serialize / deserialize calls + // + // For lazy serialization backends: + // This should result in serializer personality RESTORE having 2 deserialize calls + backend = restoreKeyedBackend(IntSerializer.INSTANCE, snapshot2); + + if (getStateBackendSerializationTimeliness() == BackendSerializationTimeliness.ON_ACCESS) { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE)); + assertEquals(null, CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE)); + } else { + assertEquals(null, CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE)); + assertEquals((Integer) 2, CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE)); + } + CustomStringSerializer.resetCountingMaps(); + + backend.getPartitionedState(VoidNamespace.INSTANCE, CustomVoidNamespaceSerializer.INSTANCE, newKvId2); + snapshot2.discardState(); + snapshot1.discardState(); + + backend.dispose(); Review comment: In general, whenever an exception can happen before disposing a backend I would suggest to do it in a `finally` clause because a test that fails with exception can leave unclosed native resources that corrupt/segfault the JVM if it is reused for more tests after the failure. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
