StefanRRichter commented on a change in pull request #6875: [FLINK-9808] [state 
backends] Migrate state when necessary in state backends
URL: https://github.com/apache/flink/pull/6875#discussion_r227722870
 
 

 ##########
 File path: 
flink-runtime/src/test/java/org/apache/flink/runtime/state/StateBackendMigrationTestBase.java
 ##########
 @@ -0,0 +1,780 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.runtime.state;
+
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.api.common.state.ValueState;
+import org.apache.flink.api.common.state.ValueStateDescriptor;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.api.common.typeutils.TypeSerializerSchemaCompatibility;
+import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
+import org.apache.flink.api.common.typeutils.base.IntSerializer;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+import org.apache.flink.runtime.checkpoint.CheckpointOptions;
+import org.apache.flink.runtime.checkpoint.StateObjectCollection;
+import org.apache.flink.runtime.execution.Environment;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.types.StringValue;
+import org.apache.flink.util.TestLogger;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.RunnableFuture;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for the {@link KeyedStateBackend} and {@link OperatorStateBackend} as 
produced
+ * by various {@link StateBackend}s.
+ */
+@SuppressWarnings("serial")
+public abstract class StateBackendMigrationTestBase<B extends 
AbstractStateBackend> extends TestLogger {
+
+       @Rule
+       public final ExpectedException expectedException = 
ExpectedException.none();
+
+       // lazily initialized stream storage
+       private CheckpointStorageLocation checkpointStorageLocation;
+
+       /**
+        * Different "personalities" of {@link CustomStringSerializer}. Instead 
of creating
+        * different classes we parameterize the serializer with this and
+        * {@link CustomStringSerializerSnapshot} will instantiate serializers 
with the correct
+        * personality.
+        */
+       public enum SerializerVersion {
+               INITIAL,
+               RESTORE,
+               NEW
+       }
+
+       /**
+        * The compatibility behaviour of {@link CustomStringSerializer}. This 
controls what
+        * type of serializer {@link CustomStringSerializerSnapshot} will 
create for
+        * the different methods that return/create serializers.
+        */
+       public enum SerializerCompatibilityType {
+               COMPATIBLE_AS_IS,
+               REQUIRES_MIGRATION
+       }
+
+       /**
+        * The serialization timeliness behaviour of the state backend under 
test.
+        */
+       public enum BackendSerializationTimeliness {
+               ON_ACCESS,
+               ON_CHECKPOINTS
+       }
+
+       @Test
+       @SuppressWarnings("unchecked")
+       public void testValueStateWithSerializerRequiringMigration() throws 
Exception {
+               CustomStringSerializer.resetCountingMaps();
+
+               CheckpointStreamFactory streamFactory = createStreamFactory();
+               SharedStateRegistry sharedStateRegistry = new 
SharedStateRegistry();
+               AbstractKeyedStateBackend<Integer> backend = 
createKeyedBackend(IntSerializer.INSTANCE);
+
+               ValueStateDescriptor<String> kvId = new ValueStateDescriptor<>(
+                       "id",
+                       new 
CustomStringSerializer(SerializerCompatibilityType.COMPATIBLE_AS_IS, 
SerializerVersion.INITIAL));
+               ValueState<String> state = 
backend.getPartitionedState(VoidNamespace.INSTANCE, 
CustomVoidNamespaceSerializer.INSTANCE, kvId);
+
+               // ============ Modifications to the state ============
+               //  For eager serialization backends:
+               //    This should result in serializer personality INITIAL 
having 2 serialize calls
+               //
+               //  For lazy serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+
+               backend.setCurrentKey(1);
+               state.update("1");
+               backend.setCurrentKey(2);
+               state.update("2");
+               backend.setCurrentKey(1);
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals((Integer) 2, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL));
+               } else {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               // ============ Snapshot #1 ============
+               //  For eager serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               //
+               //  For lazy serialization backends:
+               //    This should result in serializer personality INITIAL 
having 2 serialize calls
+               KeyedStateHandle snapshot1 = runSnapshot(
+                       backend.snapshot(1L, 2L, streamFactory, 
CheckpointOptions.forCheckpointWithDefaultLocation()),
+                       sharedStateRegistry);
+               backend.dispose();
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL));
+               } else {
+                       assertEquals((Integer) 2, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               // ============ Restore from snapshot #1 ============
+               //  For eager serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               //
+               //  For lazy serialization backends:
+               //    This should result in serializer personality RESTORE 
having 2 deserialize calls
+               backend = restoreKeyedBackend(IntSerializer.INSTANCE, 
snapshot1);
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.INITIAL));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.INITIAL));
+               } else {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE));
+                       assertEquals((Integer) 2, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               ValueStateDescriptor<String> newKvId = new 
ValueStateDescriptor<>("id",
+                       new 
CustomStringSerializer(SerializerCompatibilityType.REQUIRES_MIGRATION, 
SerializerVersion.NEW));
+
+               // ============ State registration that triggers state 
migration ============
+               //  For eager serialization backends:
+               //    This should result in serializer personality RESTORE 
having 2 deserialize calls, and NEW having 2 serialize calls
+               //
+               //  For lazy serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               ValueState<String> restored1 = 
backend.getPartitionedState(VoidNamespace.INSTANCE, 
CustomVoidNamespaceSerializer.INSTANCE, newKvId);
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE));
+                       assertEquals((Integer) 2, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE));
+
+                       assertEquals((Integer) 2, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               } else {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE));
+
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               // ============ More modifications to the state ============
+               //  For eager serialization backends:
+               //    This should result in serializer personality NEW having 2 
serialize calls and 3 deserialize calls
+               //
+               //  For lazy serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               backend.setCurrentKey(1);
+               assertEquals("1", restored1.value());
+               restored1.update("1"); // s, NEW
+               backend.setCurrentKey(2);
+               assertEquals("2", restored1.value());
+               restored1.update("3"); // s, NEW
+               assertEquals("3", restored1.value());
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals((Integer) 2, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals((Integer) 3, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               } else {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               // ============ Snapshot #2 ============
+               //  For eager serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               //
+               //  For lazy serialization backends:
+               //    This should result in serializer personality NEW having 2 
serialize calls
+               KeyedStateHandle snapshot2 = runSnapshot(
+                       backend.snapshot(2L, 3L, streamFactory, 
CheckpointOptions.forCheckpointWithDefaultLocation()),
+                       sharedStateRegistry);
+               backend.dispose();
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               } else {
+                       assertEquals((Integer) 2, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.NEW));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.NEW));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               // and restore once with NEW from NEW so that we see a read 
using the NEW serializer
+               // on the file backend
+               ValueStateDescriptor<String> newKvId2 = new 
ValueStateDescriptor<>(
+                       "id",
+                       new 
CustomStringSerializer(SerializerCompatibilityType.COMPATIBLE_AS_IS, 
SerializerVersion.NEW));
+
+               // ============ Restore from snapshot #2 ============
+               //  For eager serialization backends:
+               //    This should not result in any serialize / deserialize 
calls
+               //
+               //  For lazy serialization backends:
+               //    This should result in serializer personality RESTORE 
having 2 deserialize calls
+               backend = restoreKeyedBackend(IntSerializer.INSTANCE, 
snapshot2);
+
+               if (getStateBackendSerializationTimeliness() == 
BackendSerializationTimeliness.ON_ACCESS) {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE));
+                       assertEquals(null, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE));
+               } else {
+                       assertEquals(null, 
CustomStringSerializer.serializeCalled.get(SerializerVersion.RESTORE));
+                       assertEquals((Integer) 2, 
CustomStringSerializer.deserializeCalled.get(SerializerVersion.RESTORE));
+               }
+               CustomStringSerializer.resetCountingMaps();
+
+               backend.getPartitionedState(VoidNamespace.INSTANCE, 
CustomVoidNamespaceSerializer.INSTANCE, newKvId2);
+               snapshot2.discardState();
+               snapshot1.discardState();
+
+               backend.dispose();
 
 Review comment:
   In general, whenever an exception can happen before disposing a backend I 
would suggest to do it in a `finally` clause because a test that fails with 
exception can leave unclosed native resources that corrupt/segfault the JVM if 
it is reused for more tests after the failure.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to