scwhittle commented on code in PR #36742: URL: https://github.com/apache/beam/pull/36742#discussion_r2517706245
########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java: ########## @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.util; + +import java.lang.ref.SoftReference; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.sdk.util.Preconditions; +import org.checkerframework.checker.nullness.qual.Nullable; + +@Internal +@ThreadSafe +/* + * A utility class for caching a thread-local {@link ByteStringOutputStream}. Review Comment: how about how to use example since it is more straightforward than implementation appears ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java: ########## @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.dataflow.worker.util; + +import java.lang.ref.SoftReference; +import javax.annotation.concurrent.ThreadSafe; +import org.apache.beam.sdk.annotations.Internal; +import org.apache.beam.sdk.util.ByteStringOutputStream; +import org.apache.beam.sdk.util.Preconditions; +import org.checkerframework.checker.nullness.qual.Nullable; + +@Internal +@ThreadSafe +/* + * A utility class for caching a thread-local {@link ByteStringOutputStream}. + */ +public class ThreadLocalByteStringOutputStream { + + private static final ThreadLocal<@Nullable SoftRefHolder> threadLocalSoftRefHolder = + ThreadLocal.withInitial(SoftRefHolder::new); + + // Private constructor to prevent instantiations from outside. + private ThreadLocalByteStringOutputStream() {} + + /** @return An AutoClosable StreamHandle that holds a cached ByteStringOutputStream. */ + public static StreamHandle acquire() { + RefHolder refHolder = getRefHolderFromThreadLocal(); + if (refHolder.inUse) { + // Stream is already in use, create a new uncached one + return new StreamHandle(); + } + refHolder.inUse = true; + return Preconditions.checkArgumentNotNull( + refHolder.streamHandle); // inUse will be unset when streamHandle closes. + } + + /** + * Handle to a thread-local {@link ByteStringOutputStream}. If the thread local stream is already + * in use, a new one is used. The streams are cached and reused across calls. Users should not + * keep a reference to the stream after closing the StreamHandle. + */ + public static class StreamHandle implements AutoCloseable { + + // When Nonnull the StreamHandle is from the threadlocal and needs to be + // marked as not in use, in close. + private final @Nullable RefHolder refHolder; + private final ByteStringOutputStream stream; + + private StreamHandle() { + this.refHolder = null; + this.stream = new ByteStringOutputStream(); + } + + private StreamHandle(RefHolder refHolder) { + this.refHolder = refHolder; + this.stream = refHolder.stream; + } + + /** + * Returns the underlying cached ByteStringOutputStream. Callers should not keep a reference to + * the stream after closing the StreamHandle. + */ + public ByteStringOutputStream stream() { + return stream; + } + + @Override + public void close() { + stream.reset(); + if (refHolder != null) { + refHolder.inUse = false; + } + } + } + + private static class SoftRefHolder { + + private @Nullable SoftReference<RefHolder> softReference; + } + + private static class RefHolder { + + public ByteStringOutputStream stream = new ByteStringOutputStream(); + + // Boolean is true when the thread local stream is already in use by the current thread. + // Used to avoid reusing the same stream from nested calls if any. + public boolean inUse = false; + + public @Nullable StreamHandle streamHandle = null; + + public static RefHolder create() { + RefHolder refHolder = new RefHolder(); + refHolder.streamHandle = new StreamHandle(refHolder); + return refHolder; + } + } + + private static RefHolder getRefHolderFromThreadLocal() { + // softRefHolder is only set by Threadlocal initializer and should not be null + SoftRefHolder softRefHolder = + Preconditions.checkArgumentNotNull(threadLocalSoftRefHolder.get()); + RefHolder refHolder; + if (softRefHolder.softReference != null && softRefHolder.softReference.get() != null) { Review Comment: how about assigning result of softRefHolder.softReference.get() to intermediate variable instead of calling it twice. It seems like the precondition could fail if it was gc'd between checks and has more synchronization overhead to do twice. ``` @Nullable RefHolder refHolder; if (softRefHolder.softReference != null) { refHolder = softRefHolder.softReference.get(); } if (refHolder == null) { ... } return refHolder; ``` ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java: ########## @@ -74,38 +82,39 @@ public ByteStringOutputStream stream() { @Override public void close() { stream.reset(); - if (releaseThreadLocal) { + if (refHolder != null) { refHolder.inUse = false; } } } private static class RefHolder { - public SoftReference<@Nullable ByteStringOutputStream> streamRef = - new SoftReference<>(new ByteStringOutputStream()); + public ByteStringOutputStream stream = new ByteStringOutputStream(); Review Comment: I don't see anything using this other than StreamHandle constructor, and since we only create a single StreamHandle for a given RefHolder there doesn't seem to be benefit having it created here. In that case RefHolder is just a boolean and the StreamHandle, it seems like we could merge StreamHandle and RefHolder. ``` public static StreamHandle acquire() { StreamHandle streamHandle = getStreamHandleFromThreadLocal(); if (streamHandle.inUse) { // Stream is already in use, create a new uncached one return new StreamHandle(); } streamHandle.inUse = true; return streamHandle; // inUse will be unset when streamHandle closes. } private static class SoftRefHolder { private @Nullable SoftReference<StreamHandle> softReference; } public static class StreamHandle implements AutoClosable { boolean inUse = true; final ByteStringOutputStream stream = new ByteStringOutputStream(); public ByteStringOutputStream stream() { return stream; } @Override public void close() { stream.reset(); inUse = false; } } ``` ########## runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java: ########## @@ -74,38 +82,39 @@ public ByteStringOutputStream stream() { @Override public void close() { stream.reset(); - if (releaseThreadLocal) { + if (refHolder != null) { refHolder.inUse = false; } } } private static class RefHolder { - public SoftReference<@Nullable ByteStringOutputStream> streamRef = - new SoftReference<>(new ByteStringOutputStream()); + public ByteStringOutputStream stream = new ByteStringOutputStream(); // Boolean is true when the thread local stream is already in use by the current thread. // Used to avoid reusing the same stream from nested calls if any. public boolean inUse = false; - } - private static RefHolder getRefHolderFromThreadLocal() { - @Nullable RefHolder refHolder = threadLocalRefHolder.get(); - if (refHolder == null) { - refHolder = new RefHolder(); - threadLocalRefHolder.set(refHolder); + public @Nullable StreamHandle streamHandle = null; + + public static RefHolder create() { + RefHolder refHolder = new RefHolder(); + refHolder.streamHandle = new StreamHandle(refHolder); + return refHolder; } - return refHolder; } - private static ByteStringOutputStream getByteStringOutputStream(RefHolder refHolder) { - @Nullable - ByteStringOutputStream stream = refHolder.streamRef == null ? null : refHolder.streamRef.get(); - if (stream == null) { - stream = new ByteStringOutputStream(); - refHolder.streamRef = new SoftReference<>(stream); + private static RefHolder getRefHolderFromThreadLocal() { + @Nullable SoftReference<RefHolder> refHolderSoftReference = threadLocalRefHolder.get(); + @Nullable RefHolder refHolder = null; + if (refHolderSoftReference != null) { + refHolder = refHolderSoftReference.get(); } - return stream; + if (refHolderSoftReference == null || refHolder == null) { + refHolder = RefHolder.create(); + threadLocalRefHolder.set(new SoftReference<>(refHolder)); Review Comment: Sorry I didn't mean SoftRef<ByteOutputStream> but SoftRef<StreamHolder>. Since we've given StreamHolder to the user in acquire, the stream() method on it would be consistent. See suggestion in other comment. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
