scwhittle commented on a change in pull request #13862:
URL: https://github.com/apache/beam/pull/13862#discussion_r581816892



##########
File path: 
runners/google-cloud-dataflow-java/worker/src/main/java/org/apache/beam/runners/dataflow/worker/WindmillStateCache.java
##########
@@ -50,55 +50,68 @@
  * StreamingDataflowWorker} ensures that a single computation * processing key 
is executing on one
  * thread at a time, so this is safe.
  */
-@SuppressWarnings({
-  "nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-10402)
-})
 public class WindmillStateCache implements StatusDataProvider {
   // Convert Megabytes to bytes
   private static final long MEGABYTES = 1024 * 1024;
   // Estimate of overhead per StateId.
-  private static final int PER_STATE_ID_OVERHEAD = 20;
+  private static final long PER_STATE_ID_OVERHEAD = 28;
   // Initial size of hash tables per entry.
   private static final int INITIAL_HASH_MAP_CAPACITY = 4;
   // Overhead of each hash map entry.
   private static final int HASH_MAP_ENTRY_OVERHEAD = 16;
-  // Overhead of each cache entry.  Three longs, plus a hash table.
+  // Overhead of each StateCacheEntry.  One long, plus a hash table.
   private static final int PER_CACHE_ENTRY_OVERHEAD =
-      24 + HASH_MAP_ENTRY_OVERHEAD * INITIAL_HASH_MAP_CAPACITY;
+      8 + HASH_MAP_ENTRY_OVERHEAD * INITIAL_HASH_MAP_CAPACITY;
 
   private Cache<StateId, StateCacheEntry> stateCache;
-  private HashMultimap<WindmillComputationKey, StateId> keyIndex =
-      HashMultimap.<WindmillComputationKey, StateId>create();
-  private long displayedWeight = 0; // Only used for status pages and unit 
tests.
+  // Contains the current valid ForKey object. Entries in the cache are keyed 
by ForKey with pointer
+  // equality so entries may be invalidated by creating a new key object, 
rendering the previous
+  // entries inaccessible. They will be evicted through normal cache operation.
+  private ConcurrentMap<WindmillComputationKey, ForKey> keyIndex =
+      new MapMaker().weakValues().concurrencyLevel(4).makeMap();
   private long workerCacheBytes; // Copy workerCacheMb and convert to bytes.
 
-  public WindmillStateCache(Integer workerCacheMb) {
+  public WindmillStateCache(long workerCacheMb) {
     final Weigher<Weighted, Weighted> weigher = 
Weighers.weightedKeysAndValues();
     workerCacheBytes = workerCacheMb * MEGABYTES;
     stateCache =
         CacheBuilder.newBuilder()
             .maximumWeight(workerCacheBytes)
             .recordStats()
             .weigher(weigher)
-            .removalListener(
-                removal -> {
-                  if (removal.getCause() != RemovalCause.REPLACED) {
-                    synchronized (this) {
-                      StateId id = (StateId) removal.getKey();
-                      if (removal.getCause() != RemovalCause.EXPLICIT) {
-                        // When we invalidate a key explicitly, we'll also 
update the keyIndex, so
-                        // no need to do it here.
-                        keyIndex.remove(id.getWindmillComputationKey(), id);
-                      }
-                      displayedWeight -= weigher.weigh(id, removal.getValue());
-                    }
-                  }
-                })
+            .concurrencyLevel(4)
             .build();
   }
 
+  private static class EntryStats {
+    long entries;
+    long idWeight;
+    long entryWeight;
+    long entryValues;
+    long maxEntryValues;
+  }
+
+  private EntryStats calculateEntryStats() {
+    class CacheConsumer implements BiConsumer<StateId, StateCacheEntry> {
+      public EntryStats stats = new EntryStats();
+
+      @Override
+      public void accept(StateId stateId, StateCacheEntry stateCacheEntry) {
+        stats.entries++;
+        stats.idWeight += stateId.getWeight();
+        stats.entryWeight += stateCacheEntry.getWeight();
+        stats.entryValues += stateCacheEntry.values.size();
+        stats.maxEntryValues = Math.max(stats.maxEntryValues, 
stateCacheEntry.values.size());
+      }
+    }
+    CacheConsumer consumer = new CacheConsumer();

Review comment:
       Done, thanks




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to