scwhittle commented on code in PR #37151: URL: https://github.com/apache/beam/pull/37151#discussion_r2636524448
########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ +@Internal +@ThreadSafe +public class WindmillTagEncodingV2 extends WindmillTagEncoding { + + private static final WindmillTagEncodingV2 INSTANCE = new WindmillTagEncodingV2(); + private static final int WINDOW_NAMESPACE_BYTE = 0x01; + private static final int WINDOW_AND_TRIGGER_NAMESPACE_BYTE = 0x02; + private static final int NON_GLOBAL_NAMESPACE_BYTE = 0x10; + private static final int GLOBAL_NAMESPACE_BYTE = 0x01; + private static final int SYSTEM_STATE_TAG_BYTE = 0x01; + private static final int USER_STATE_TAG_BYTE = 0x02; + private static final int SYSTEM_TIMER_BYTE = 0x03; + private static final int USER_TIMER_BYTE = 0x04; + private static final int INTERVAL_WINDOW_BYTE = 0x64; + private static final int OTHER_WINDOW_BYTE = 0x02; + + // Private constructor to prevent instantiations from outside. + private WindmillTagEncodingV2() {} + + /** {@inheritDoc} */ + @Override + public InternedByteString stateTag(StateNamespace namespace, StateTag<?> address) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(namespace, stream); + encodeAddress(address, stream); + return InternedByteString.of(stream.toByteStringAndReset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public ByteString timerHoldTag( + WindmillNamespacePrefix prefix, TimerData timerData, ByteString timerTag) { + // Same encoding for timer tag and timer hold tag. + // They are put in different places and won't collide. + return timerTag; + } + + /** {@inheritDoc} */ + @Override + public ByteString timerTag(WindmillNamespacePrefix prefix, TimerData timerData) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(timerData.getNamespace(), stream); + if (WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(SYSTEM_TIMER_BYTE); + } else if (WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(USER_TIMER_BYTE); + } else { + throw new IllegalStateException("Unexpected WindmillNamespacePrefix" + prefix); + } + StringUtf8Coder.of().encode(timerData.getTimerFamilyId(), stream); + StringUtf8Coder.of().encode(timerData.getTimerId(), stream); + return stream.toByteStringAndReset(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public TimerData windmillTimerToTimerData( + WindmillNamespacePrefix prefix, + Timer timer, + Coder<? extends BoundedWindow> windowCoder, + boolean draining) { + + InputStream stream = timer.getTag().newInput(); + + try { + StateNamespace stateNamespace = decodeNameSpace(stream, windowCoder); + int nextByte = stream.read(); + if (nextByte == SYSTEM_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)); + } else if (nextByte == USER_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)); + } else { + throw new IllegalStateException("Unexpected timer tag byte: " + nextByte); + } + + String timerFamilyId = StringUtf8Coder.of().decode(stream); + String timerId = StringUtf8Coder.of().decode(stream); + + Instant timestamp = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp()); + Instant outputTimestamp = timestamp; + if (timer.hasMetadataTimestamp()) { + // We use BoundedWindow.TIMESTAMP_MAX_VALUE+1 to indicate "no output timestamp" so make sure + // to change the upper bound. + outputTimestamp = + WindmillTimeUtils.windmillToHarnessTimestamp(timer.getMetadataTimestamp()); + if (outputTimestamp.equals(OUTPUT_TIMESTAMP_MAX_WINDMILL_VALUE)) { + outputTimestamp = OUTPUT_TIMESTAMP_MAX_VALUE; + } + } + + return TimerData.of( + timerId, + timerFamilyId, + stateNamespace, + timestamp, + outputTimestamp, + timerTypeToTimeDomain(timer.getType())); + + } catch (IOException e) { + throw new RuntimeException(e); + } + // todo add draining (https://github.com/apache/beam/issues/36884) + } + + /** @return the singleton WindmillStateTagUtil */ + public static WindmillTagEncodingV2 instance() { + return INSTANCE; + } + + private void encodeAddress(StateTag<?> tag, ByteStringOutputStream stream) throws IOException { + if (StateTags.isSystemTagInternal(tag)) { + stream.write(SYSTEM_STATE_TAG_BYTE); // System tag Review Comment: comment not adding much ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ +@Internal +@ThreadSafe +public class WindmillTagEncodingV2 extends WindmillTagEncoding { + + private static final WindmillTagEncodingV2 INSTANCE = new WindmillTagEncodingV2(); + private static final int WINDOW_NAMESPACE_BYTE = 0x01; + private static final int WINDOW_AND_TRIGGER_NAMESPACE_BYTE = 0x02; + private static final int NON_GLOBAL_NAMESPACE_BYTE = 0x10; + private static final int GLOBAL_NAMESPACE_BYTE = 0x01; + private static final int SYSTEM_STATE_TAG_BYTE = 0x01; + private static final int USER_STATE_TAG_BYTE = 0x02; + private static final int SYSTEM_TIMER_BYTE = 0x03; + private static final int USER_TIMER_BYTE = 0x04; + private static final int INTERVAL_WINDOW_BYTE = 0x64; + private static final int OTHER_WINDOW_BYTE = 0x02; + + // Private constructor to prevent instantiations from outside. + private WindmillTagEncodingV2() {} + + /** {@inheritDoc} */ + @Override + public InternedByteString stateTag(StateNamespace namespace, StateTag<?> address) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(namespace, stream); + encodeAddress(address, stream); + return InternedByteString.of(stream.toByteStringAndReset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public ByteString timerHoldTag( + WindmillNamespacePrefix prefix, TimerData timerData, ByteString timerTag) { + // Same encoding for timer tag and timer hold tag. + // They are put in different places and won't collide. + return timerTag; + } + + /** {@inheritDoc} */ + @Override + public ByteString timerTag(WindmillNamespacePrefix prefix, TimerData timerData) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(timerData.getNamespace(), stream); + if (WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(SYSTEM_TIMER_BYTE); + } else if (WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(USER_TIMER_BYTE); + } else { + throw new IllegalStateException("Unexpected WindmillNamespacePrefix" + prefix); + } + StringUtf8Coder.of().encode(timerData.getTimerFamilyId(), stream); + StringUtf8Coder.of().encode(timerData.getTimerId(), stream); + return stream.toByteStringAndReset(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public TimerData windmillTimerToTimerData( + WindmillNamespacePrefix prefix, + Timer timer, + Coder<? extends BoundedWindow> windowCoder, + boolean draining) { + + InputStream stream = timer.getTag().newInput(); + + try { + StateNamespace stateNamespace = decodeNameSpace(stream, windowCoder); + int nextByte = stream.read(); + if (nextByte == SYSTEM_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)); + } else if (nextByte == USER_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)); + } else { + throw new IllegalStateException("Unexpected timer tag byte: " + nextByte); + } + + String timerFamilyId = StringUtf8Coder.of().decode(stream); + String timerId = StringUtf8Coder.of().decode(stream); + + Instant timestamp = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp()); + Instant outputTimestamp = timestamp; + if (timer.hasMetadataTimestamp()) { + // We use BoundedWindow.TIMESTAMP_MAX_VALUE+1 to indicate "no output timestamp" so make sure + // to change the upper bound. + outputTimestamp = + WindmillTimeUtils.windmillToHarnessTimestamp(timer.getMetadataTimestamp()); + if (outputTimestamp.equals(OUTPUT_TIMESTAMP_MAX_WINDMILL_VALUE)) { + outputTimestamp = OUTPUT_TIMESTAMP_MAX_VALUE; + } + } + + return TimerData.of( + timerId, + timerFamilyId, + stateNamespace, + timestamp, + outputTimestamp, + timerTypeToTimeDomain(timer.getType())); + + } catch (IOException e) { + throw new RuntimeException(e); + } + // todo add draining (https://github.com/apache/beam/issues/36884) + } + + /** @return the singleton WindmillStateTagUtil */ + public static WindmillTagEncodingV2 instance() { + return INSTANCE; + } + + private void encodeAddress(StateTag<?> tag, ByteStringOutputStream stream) throws IOException { + if (StateTags.isSystemTagInternal(tag)) { + stream.write(SYSTEM_STATE_TAG_BYTE); // System tag + } else { + stream.write(USER_STATE_TAG_BYTE); // User tag + } + StringUtf8Coder.of().encode(tag.getId(), stream); + } + + private void encodeNameSpace(StateNamespace namespace, ByteStringOutputStream stream) + throws IOException { + if (namespace instanceof GlobalNamespace) { + stream.write(GLOBAL_NAMESPACE_BYTE); + } else if (namespace instanceof WindowNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowNamespace((WindowNamespace<? extends BoundedWindow>) namespace, stream); + } else if (namespace instanceof WindowAndTriggerNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowAndTriggerNamespace( + (WindowAndTriggerNamespace<? extends BoundedWindow>) namespace, stream); + } else { + throw new IllegalStateException("Unsupported namespace type: " + namespace.getClass()); + } + } + + private StateNamespace decodeNameSpace( + InputStream stream, Coder<? extends BoundedWindow> windowCoder) throws IOException { + int firstByte = stream.read(); + switch (firstByte) { + case GLOBAL_NAMESPACE_BYTE: // GlobalNamespace + return StateNamespaces.global(); + case NON_GLOBAL_NAMESPACE_BYTE: // Non-Global namespace + return decodeNonGlobalNamespace(stream, windowCoder); + default: + throw new IllegalStateException("Invalid first namespace byte: " + firstByte); + } + } + + private <W extends BoundedWindow> StateNamespace decodeNonGlobalNamespace( + InputStream stream, Coder<W> windowCoder) throws IOException { + W window = decodeWindow(stream, windowCoder); + int namespaceByte = stream.read(); + switch (namespaceByte) { + case WINDOW_NAMESPACE_BYTE: // Window namespace + return StateNamespaces.window(windowCoder, window); + case WINDOW_AND_TRIGGER_NAMESPACE_BYTE: // Window and trigger namespace + Integer triggerIndex = BigEndianIntegerCoder.of().decode(stream); + return StateNamespaces.windowAndTrigger(windowCoder, window, triggerIndex); + default: + throw new IllegalStateException("Invalid trigger namespace byte: " + namespaceByte); + } + } + + private <W extends BoundedWindow> W decodeWindow(InputStream stream, Coder<W> windowCoder) Review Comment: does this need a template or can it just return a BoundedWindow? ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ +@Internal +@ThreadSafe +public class WindmillTagEncodingV2 extends WindmillTagEncoding { + + private static final WindmillTagEncodingV2 INSTANCE = new WindmillTagEncodingV2(); Review Comment: I think an ascii table of the table within the doc showing how tags are constructed would be useful as a comment here. ########## runners/google-cloud-dataflow-java/worker/src/test/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2Test.java: ########## @@ -0,0 +1,514 @@ +/* Review Comment: add tests about ordering, since that is the motivation for the v2? ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ +@Internal +@ThreadSafe +public class WindmillTagEncodingV2 extends WindmillTagEncoding { + + private static final WindmillTagEncodingV2 INSTANCE = new WindmillTagEncodingV2(); + private static final int WINDOW_NAMESPACE_BYTE = 0x01; + private static final int WINDOW_AND_TRIGGER_NAMESPACE_BYTE = 0x02; + private static final int NON_GLOBAL_NAMESPACE_BYTE = 0x10; + private static final int GLOBAL_NAMESPACE_BYTE = 0x01; + private static final int SYSTEM_STATE_TAG_BYTE = 0x01; + private static final int USER_STATE_TAG_BYTE = 0x02; + private static final int SYSTEM_TIMER_BYTE = 0x03; + private static final int USER_TIMER_BYTE = 0x04; + private static final int INTERVAL_WINDOW_BYTE = 0x64; + private static final int OTHER_WINDOW_BYTE = 0x02; + + // Private constructor to prevent instantiations from outside. + private WindmillTagEncodingV2() {} + + /** {@inheritDoc} */ + @Override + public InternedByteString stateTag(StateNamespace namespace, StateTag<?> address) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(namespace, stream); + encodeAddress(address, stream); + return InternedByteString.of(stream.toByteStringAndReset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public ByteString timerHoldTag( + WindmillNamespacePrefix prefix, TimerData timerData, ByteString timerTag) { + // Same encoding for timer tag and timer hold tag. + // They are put in different places and won't collide. + return timerTag; + } + + /** {@inheritDoc} */ + @Override + public ByteString timerTag(WindmillNamespacePrefix prefix, TimerData timerData) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(timerData.getNamespace(), stream); + if (WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(SYSTEM_TIMER_BYTE); + } else if (WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(USER_TIMER_BYTE); + } else { + throw new IllegalStateException("Unexpected WindmillNamespacePrefix" + prefix); + } + StringUtf8Coder.of().encode(timerData.getTimerFamilyId(), stream); + StringUtf8Coder.of().encode(timerData.getTimerId(), stream); + return stream.toByteStringAndReset(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public TimerData windmillTimerToTimerData( + WindmillNamespacePrefix prefix, + Timer timer, + Coder<? extends BoundedWindow> windowCoder, + boolean draining) { + + InputStream stream = timer.getTag().newInput(); + + try { + StateNamespace stateNamespace = decodeNameSpace(stream, windowCoder); + int nextByte = stream.read(); + if (nextByte == SYSTEM_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)); + } else if (nextByte == USER_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)); + } else { + throw new IllegalStateException("Unexpected timer tag byte: " + nextByte); + } + + String timerFamilyId = StringUtf8Coder.of().decode(stream); + String timerId = StringUtf8Coder.of().decode(stream); + + Instant timestamp = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp()); + Instant outputTimestamp = timestamp; + if (timer.hasMetadataTimestamp()) { + // We use BoundedWindow.TIMESTAMP_MAX_VALUE+1 to indicate "no output timestamp" so make sure + // to change the upper bound. + outputTimestamp = + WindmillTimeUtils.windmillToHarnessTimestamp(timer.getMetadataTimestamp()); + if (outputTimestamp.equals(OUTPUT_TIMESTAMP_MAX_WINDMILL_VALUE)) { + outputTimestamp = OUTPUT_TIMESTAMP_MAX_VALUE; + } + } + + return TimerData.of( + timerId, + timerFamilyId, + stateNamespace, + timestamp, + outputTimestamp, + timerTypeToTimeDomain(timer.getType())); + + } catch (IOException e) { + throw new RuntimeException(e); + } + // todo add draining (https://github.com/apache/beam/issues/36884) + } + + /** @return the singleton WindmillStateTagUtil */ + public static WindmillTagEncodingV2 instance() { + return INSTANCE; + } + + private void encodeAddress(StateTag<?> tag, ByteStringOutputStream stream) throws IOException { + if (StateTags.isSystemTagInternal(tag)) { + stream.write(SYSTEM_STATE_TAG_BYTE); // System tag + } else { + stream.write(USER_STATE_TAG_BYTE); // User tag + } + StringUtf8Coder.of().encode(tag.getId(), stream); + } + + private void encodeNameSpace(StateNamespace namespace, ByteStringOutputStream stream) + throws IOException { + if (namespace instanceof GlobalNamespace) { + stream.write(GLOBAL_NAMESPACE_BYTE); + } else if (namespace instanceof WindowNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowNamespace((WindowNamespace<? extends BoundedWindow>) namespace, stream); + } else if (namespace instanceof WindowAndTriggerNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowAndTriggerNamespace( + (WindowAndTriggerNamespace<? extends BoundedWindow>) namespace, stream); + } else { + throw new IllegalStateException("Unsupported namespace type: " + namespace.getClass()); + } + } + + private StateNamespace decodeNameSpace( + InputStream stream, Coder<? extends BoundedWindow> windowCoder) throws IOException { + int firstByte = stream.read(); + switch (firstByte) { + case GLOBAL_NAMESPACE_BYTE: // GlobalNamespace + return StateNamespaces.global(); + case NON_GLOBAL_NAMESPACE_BYTE: // Non-Global namespace + return decodeNonGlobalNamespace(stream, windowCoder); + default: + throw new IllegalStateException("Invalid first namespace byte: " + firstByte); + } + } + + private <W extends BoundedWindow> StateNamespace decodeNonGlobalNamespace( + InputStream stream, Coder<W> windowCoder) throws IOException { + W window = decodeWindow(stream, windowCoder); + int namespaceByte = stream.read(); + switch (namespaceByte) { + case WINDOW_NAMESPACE_BYTE: // Window namespace Review Comment: ditto ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ +@Internal +@ThreadSafe +public class WindmillTagEncodingV2 extends WindmillTagEncoding { + + private static final WindmillTagEncodingV2 INSTANCE = new WindmillTagEncodingV2(); + private static final int WINDOW_NAMESPACE_BYTE = 0x01; + private static final int WINDOW_AND_TRIGGER_NAMESPACE_BYTE = 0x02; + private static final int NON_GLOBAL_NAMESPACE_BYTE = 0x10; + private static final int GLOBAL_NAMESPACE_BYTE = 0x01; + private static final int SYSTEM_STATE_TAG_BYTE = 0x01; + private static final int USER_STATE_TAG_BYTE = 0x02; + private static final int SYSTEM_TIMER_BYTE = 0x03; + private static final int USER_TIMER_BYTE = 0x04; + private static final int INTERVAL_WINDOW_BYTE = 0x64; + private static final int OTHER_WINDOW_BYTE = 0x02; + + // Private constructor to prevent instantiations from outside. + private WindmillTagEncodingV2() {} + + /** {@inheritDoc} */ + @Override + public InternedByteString stateTag(StateNamespace namespace, StateTag<?> address) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(namespace, stream); + encodeAddress(address, stream); + return InternedByteString.of(stream.toByteStringAndReset()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public ByteString timerHoldTag( + WindmillNamespacePrefix prefix, TimerData timerData, ByteString timerTag) { + // Same encoding for timer tag and timer hold tag. + // They are put in different places and won't collide. + return timerTag; + } + + /** {@inheritDoc} */ + @Override + public ByteString timerTag(WindmillNamespacePrefix prefix, TimerData timerData) { + try (StreamHandle streamHandle = ThreadLocalByteStringOutputStream.acquire()) { + ByteStringOutputStream stream = streamHandle.stream(); + encodeNameSpace(timerData.getNamespace(), stream); + if (WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(SYSTEM_TIMER_BYTE); + } else if (WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)) { + stream.write(USER_TIMER_BYTE); + } else { + throw new IllegalStateException("Unexpected WindmillNamespacePrefix" + prefix); + } + StringUtf8Coder.of().encode(timerData.getTimerFamilyId(), stream); + StringUtf8Coder.of().encode(timerData.getTimerId(), stream); + return stream.toByteStringAndReset(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** {@inheritDoc} */ + @Override + public TimerData windmillTimerToTimerData( + WindmillNamespacePrefix prefix, + Timer timer, + Coder<? extends BoundedWindow> windowCoder, + boolean draining) { + + InputStream stream = timer.getTag().newInput(); + + try { + StateNamespace stateNamespace = decodeNameSpace(stream, windowCoder); + int nextByte = stream.read(); + if (nextByte == SYSTEM_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.SYSTEM_NAMESPACE_PREFIX.equals(prefix)); + } else if (nextByte == USER_TIMER_BYTE) { + checkState(WindmillNamespacePrefix.USER_NAMESPACE_PREFIX.equals(prefix)); + } else { + throw new IllegalStateException("Unexpected timer tag byte: " + nextByte); + } + + String timerFamilyId = StringUtf8Coder.of().decode(stream); + String timerId = StringUtf8Coder.of().decode(stream); + + Instant timestamp = WindmillTimeUtils.windmillToHarnessTimestamp(timer.getTimestamp()); + Instant outputTimestamp = timestamp; + if (timer.hasMetadataTimestamp()) { + // We use BoundedWindow.TIMESTAMP_MAX_VALUE+1 to indicate "no output timestamp" so make sure + // to change the upper bound. + outputTimestamp = + WindmillTimeUtils.windmillToHarnessTimestamp(timer.getMetadataTimestamp()); + if (outputTimestamp.equals(OUTPUT_TIMESTAMP_MAX_WINDMILL_VALUE)) { + outputTimestamp = OUTPUT_TIMESTAMP_MAX_VALUE; + } + } + + return TimerData.of( + timerId, + timerFamilyId, + stateNamespace, + timestamp, + outputTimestamp, + timerTypeToTimeDomain(timer.getType())); + + } catch (IOException e) { + throw new RuntimeException(e); + } + // todo add draining (https://github.com/apache/beam/issues/36884) + } + + /** @return the singleton WindmillStateTagUtil */ + public static WindmillTagEncodingV2 instance() { + return INSTANCE; + } + + private void encodeAddress(StateTag<?> tag, ByteStringOutputStream stream) throws IOException { + if (StateTags.isSystemTagInternal(tag)) { + stream.write(SYSTEM_STATE_TAG_BYTE); // System tag + } else { + stream.write(USER_STATE_TAG_BYTE); // User tag + } + StringUtf8Coder.of().encode(tag.getId(), stream); + } + + private void encodeNameSpace(StateNamespace namespace, ByteStringOutputStream stream) + throws IOException { + if (namespace instanceof GlobalNamespace) { + stream.write(GLOBAL_NAMESPACE_BYTE); + } else if (namespace instanceof WindowNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowNamespace((WindowNamespace<? extends BoundedWindow>) namespace, stream); + } else if (namespace instanceof WindowAndTriggerNamespace) { + stream.write(NON_GLOBAL_NAMESPACE_BYTE); + encodeWindowAndTriggerNamespace( + (WindowAndTriggerNamespace<? extends BoundedWindow>) namespace, stream); + } else { + throw new IllegalStateException("Unsupported namespace type: " + namespace.getClass()); + } + } + + private StateNamespace decodeNameSpace( + InputStream stream, Coder<? extends BoundedWindow> windowCoder) throws IOException { + int firstByte = stream.read(); + switch (firstByte) { + case GLOBAL_NAMESPACE_BYTE: // GlobalNamespace Review Comment: ditto ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/windmill/state/WindmillTagEncodingV2.java: ########## @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.windmill.state; + +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; + +import java.io.IOException; +import java.io.InputStream; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.runners.core.StateNamespace; +import org.apache.beam.runners.core.StateNamespaces; +import org.apache.beam.runners.core.StateNamespaces.GlobalNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowAndTriggerNamespace; +import org.apache.beam.runners.core.StateNamespaces.WindowNamespace; +import org.apache.beam.runners.core.StateTag; +import org.apache.beam.runners.core.StateTags; +import org.apache.beam.runners.core.TimerInternals.TimerData; +import org.apache.beam.runners.dataflow.worker.WindmillNamespacePrefix; +import org.apache.beam.runners.dataflow.worker.WindmillTimeUtils; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream; +import org.apache.beam.runners.dataflow.worker.util.ThreadLocalByteStringOutputStream.StreamHandle; +import org.apache.beam.runners.dataflow.worker.util.common.worker.InternedByteString; +import org.apache.beam.runners.dataflow.worker.windmill.Windmill.Timer; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.coders.BigEndianIntegerCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.InstantCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.transforms.windowing.BoundedWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow; +import org.apache.beam.sdk.transforms.windowing.IntervalWindow.IntervalWindowCoder; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.vendor.grpc.v1p69p0.com.google.protobuf.ByteString; +import org.joda.time.Instant; + +/** Encodes and decodes StateTags and TimerTags from and to windmill bytes */ Review Comment: add a comment about ordering guarantees for tags -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
