scwhittle commented on code in PR #36742:
URL: https://github.com/apache/beam/pull/36742#discussion_r2517706245


##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java:
##########
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.util;
+
+import java.lang.ref.SoftReference;
+import javax.annotation.concurrent.ThreadSafe;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.util.ByteStringOutputStream;
+import org.apache.beam.sdk.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@Internal
+@ThreadSafe
+/*
+ * A utility class for caching a thread-local {@link ByteStringOutputStream}.

Review Comment:
   how about how to use example since it is more straightforward than 
implementation appears



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java:
##########
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.dataflow.worker.util;
+
+import java.lang.ref.SoftReference;
+import javax.annotation.concurrent.ThreadSafe;
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.util.ByteStringOutputStream;
+import org.apache.beam.sdk.util.Preconditions;
+import org.checkerframework.checker.nullness.qual.Nullable;
+
+@Internal
+@ThreadSafe
+/*
+ * A utility class for caching a thread-local {@link ByteStringOutputStream}.
+ */
+public class ThreadLocalByteStringOutputStream {
+
+  private static final ThreadLocal<@Nullable SoftRefHolder> 
threadLocalSoftRefHolder =
+      ThreadLocal.withInitial(SoftRefHolder::new);
+
+  // Private constructor to prevent instantiations from outside.
+  private ThreadLocalByteStringOutputStream() {}
+
+  /** @return An AutoClosable StreamHandle that holds a cached 
ByteStringOutputStream. */
+  public static StreamHandle acquire() {
+    RefHolder refHolder = getRefHolderFromThreadLocal();
+    if (refHolder.inUse) {
+      // Stream is already in use, create a new uncached one
+      return new StreamHandle();
+    }
+    refHolder.inUse = true;
+    return Preconditions.checkArgumentNotNull(
+        refHolder.streamHandle); // inUse will be unset when streamHandle 
closes.
+  }
+
+  /**
+   * Handle to a thread-local {@link ByteStringOutputStream}. If the thread 
local stream is already
+   * in use, a new one is used. The streams are cached and reused across 
calls. Users should not
+   * keep a reference to the stream after closing the StreamHandle.
+   */
+  public static class StreamHandle implements AutoCloseable {
+
+    // When Nonnull the StreamHandle is from the threadlocal and needs to be
+    // marked as not in use, in close.
+    private final @Nullable RefHolder refHolder;
+    private final ByteStringOutputStream stream;
+
+    private StreamHandle() {
+      this.refHolder = null;
+      this.stream = new ByteStringOutputStream();
+    }
+
+    private StreamHandle(RefHolder refHolder) {
+      this.refHolder = refHolder;
+      this.stream = refHolder.stream;
+    }
+
+    /**
+     * Returns the underlying cached ByteStringOutputStream. Callers should 
not keep a reference to
+     * the stream after closing the StreamHandle.
+     */
+    public ByteStringOutputStream stream() {
+      return stream;
+    }
+
+    @Override
+    public void close() {
+      stream.reset();
+      if (refHolder != null) {
+        refHolder.inUse = false;
+      }
+    }
+  }
+
+  private static class SoftRefHolder {
+
+    private @Nullable SoftReference<RefHolder> softReference;
+  }
+
+  private static class RefHolder {
+
+    public ByteStringOutputStream stream = new ByteStringOutputStream();
+
+    // Boolean is true when the thread local stream is already in use by the 
current thread.
+    // Used to avoid reusing the same stream from nested calls if any.
+    public boolean inUse = false;
+
+    public @Nullable StreamHandle streamHandle = null;
+
+    public static RefHolder create() {
+      RefHolder refHolder = new RefHolder();
+      refHolder.streamHandle = new StreamHandle(refHolder);
+      return refHolder;
+    }
+  }
+
+  private static RefHolder getRefHolderFromThreadLocal() {
+    // softRefHolder is only set by Threadlocal initializer and should not be 
null
+    SoftRefHolder softRefHolder =
+        Preconditions.checkArgumentNotNull(threadLocalSoftRefHolder.get());
+    RefHolder refHolder;
+    if (softRefHolder.softReference != null && 
softRefHolder.softReference.get() != null) {

Review Comment:
   how about assigning result of softRefHolder.softReference.get() to 
intermediate variable instead of calling it twice.
   It seems like the precondition could fail if it was gc'd between checks and 
has more synchronization overhead to do twice.
   
   ```
   @Nullable RefHolder refHolder;
   if (softRefHolder.softReference != null) {
     refHolder = softRefHolder.softReference.get();
   }
   if (refHolder == null) {
     ...
   }
   return refHolder;
   ```



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java:
##########
@@ -74,38 +82,39 @@ public ByteStringOutputStream stream() {
     @Override
     public void close() {
       stream.reset();
-      if (releaseThreadLocal) {
+      if (refHolder != null) {
         refHolder.inUse = false;
       }
     }
   }
 
   private static class RefHolder {
 
-    public SoftReference<@Nullable ByteStringOutputStream> streamRef =
-        new SoftReference<>(new ByteStringOutputStream());
+    public ByteStringOutputStream stream = new ByteStringOutputStream();

Review Comment:
   I don't see anything using this other than StreamHandle constructor, and 
since we only create a single StreamHandle for a given RefHolder there doesn't 
seem to be benefit having it created here.
   
   In that case RefHolder is just a boolean and the StreamHandle, it seems like 
we could merge StreamHandle and RefHolder.
   
   ```
     public static StreamHandle acquire() {
       StreamHandle streamHandle = getStreamHandleFromThreadLocal();
       if (streamHandle.inUse) {
         // Stream is already in use, create a new uncached one
         return new StreamHandle();
       }
       streamHandle.inUse = true;
       return streamHandle; // inUse will be unset when streamHandle closes.
     }
   
     private static class SoftRefHolder {
       private @Nullable SoftReference<StreamHandle> softReference;
     }
     public static class StreamHandle implements AutoClosable {
       boolean inUse = true;
       final ByteStringOutputStream stream = new ByteStringOutputStream();
   
       public ByteStringOutputStream stream() {
         return stream;
       }
   
       @Override
       public void close() {
         stream.reset();
         inUse = false;
       }
     }
   ```



##########
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/util/ThreadLocalByteStringOutputStream.java:
##########
@@ -74,38 +82,39 @@ public ByteStringOutputStream stream() {
     @Override
     public void close() {
       stream.reset();
-      if (releaseThreadLocal) {
+      if (refHolder != null) {
         refHolder.inUse = false;
       }
     }
   }
 
   private static class RefHolder {
 
-    public SoftReference<@Nullable ByteStringOutputStream> streamRef =
-        new SoftReference<>(new ByteStringOutputStream());
+    public ByteStringOutputStream stream = new ByteStringOutputStream();
 
     // Boolean is true when the thread local stream is already in use by the 
current thread.
     // Used to avoid reusing the same stream from nested calls if any.
     public boolean inUse = false;
-  }
 
-  private static RefHolder getRefHolderFromThreadLocal() {
-    @Nullable RefHolder refHolder = threadLocalRefHolder.get();
-    if (refHolder == null) {
-      refHolder = new RefHolder();
-      threadLocalRefHolder.set(refHolder);
+    public @Nullable StreamHandle streamHandle = null;
+
+    public static RefHolder create() {
+      RefHolder refHolder = new RefHolder();
+      refHolder.streamHandle = new StreamHandle(refHolder);
+      return refHolder;
     }
-    return refHolder;
   }
 
-  private static ByteStringOutputStream getByteStringOutputStream(RefHolder 
refHolder) {
-    @Nullable
-    ByteStringOutputStream stream = refHolder.streamRef == null ? null : 
refHolder.streamRef.get();
-    if (stream == null) {
-      stream = new ByteStringOutputStream();
-      refHolder.streamRef = new SoftReference<>(stream);
+  private static RefHolder getRefHolderFromThreadLocal() {
+    @Nullable SoftReference<RefHolder> refHolderSoftReference = 
threadLocalRefHolder.get();
+    @Nullable RefHolder refHolder = null;
+    if (refHolderSoftReference != null) {
+      refHolder = refHolderSoftReference.get();
     }
-    return stream;
+    if (refHolderSoftReference == null || refHolder == null) {
+      refHolder = RefHolder.create();
+      threadLocalRefHolder.set(new SoftReference<>(refHolder));

Review Comment:
   Sorry I didn't mean SoftRef<ByteOutputStream> but SoftRef<StreamHolder>.  
Since we've given StreamHolder to the user in acquire, the stream() method on 
it would be consistent. See suggestion in other comment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to