Github user StefanRRichter commented on a diff in the pull request:

    https://github.com/apache/flink/pull/6228#discussion_r199814790
  
    --- Diff: 
flink-runtime/src/main/java/org/apache/flink/runtime/state/heap/HeapPriorityQueueSet.java
 ---
    @@ -0,0 +1,186 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + * http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.runtime.state.heap;
    +
    +import org.apache.flink.annotation.VisibleForTesting;
    +import org.apache.flink.runtime.state.KeyExtractorFunction;
    +import org.apache.flink.runtime.state.KeyGroupRange;
    +import org.apache.flink.runtime.state.KeyGroupRangeAssignment;
    +
    +import javax.annotation.Nonnegative;
    +import javax.annotation.Nonnull;
    +import javax.annotation.Nullable;
    +
    +import java.util.ArrayList;
    +import java.util.Collections;
    +import java.util.Comparator;
    +import java.util.HashMap;
    +import java.util.List;
    +import java.util.Set;
    +
    +import static org.apache.flink.util.Preconditions.checkArgument;
    +
    +/**
    + * A heap-based priority queue with set semantics, based on {@link 
HeapPriorityQueue}. The heap is supported by hash
    + * set for fast contains (de-duplication) and deletes. Object 
identification happens based on {@link #equals(Object)}.
    + *
    + * <p>Possible future improvements:
    + * <ul>
    + *  <li>We could also implement shrinking for the heap and the 
deduplication set.</li>
    + *  <li>We could replace the deduplication maps with more efficient custom 
implementations. In particular, a hash set
    + * would be enough if it could return existing elements on unsuccessful 
adding, etc..</li>
    + * </ul>
    + *
    + * @param <T> type of the contained elements.
    + */
    +public class HeapPriorityQueueSet<T extends HeapPriorityQueueElement> 
extends HeapPriorityQueue<T> {
    +
    +   /**
    +    * Function to extract the key from contained elements.
    +    */
    +   private final KeyExtractorFunction<T> keyExtractor;
    +
    +   /**
    +    * This array contains one hash set per key-group. The sets are used 
for fast de-duplication and deletes of elements.
    +    */
    +   private final HashMap<T, T>[] deduplicationMapsByKeyGroup;
    +
    +   /**
    +    * The key-group range of elements that are managed by this queue.
    +    */
    +   private final KeyGroupRange keyGroupRange;
    +
    +   /**
    +    * The total number of key-groups of the job.
    +    */
    +   private final int totalNumberOfKeyGroups;
    +
    +   /**
    +    * Creates an empty {@link HeapPriorityQueueSet} with the requested 
initial capacity.
    +    *
    +    * @param elementComparator comparator for the contained elements.
    +    * @param keyExtractor function to extract a key from the contained 
elements.
    +    * @param minimumCapacity the minimum and initial capacity of this 
priority queue.
    +    * @param keyGroupRange the key-group range of the elements in this set.
    +    * @param totalNumberOfKeyGroups the total number of key-groups of the 
job.
    +    */
    +   @SuppressWarnings("unchecked")
    +   public HeapPriorityQueueSet(
    +           @Nonnull Comparator<T> elementComparator,
    +           @Nonnull KeyExtractorFunction<T> keyExtractor,
    +           @Nonnegative int minimumCapacity,
    +           @Nonnull KeyGroupRange keyGroupRange,
    +           @Nonnegative int totalNumberOfKeyGroups) {
    +
    +           super(elementComparator, minimumCapacity);
    +
    +           this.keyExtractor = keyExtractor;
    +
    +           this.totalNumberOfKeyGroups = totalNumberOfKeyGroups;
    +           this.keyGroupRange = keyGroupRange;
    +
    +           final int keyGroupsInLocalRange = 
keyGroupRange.getNumberOfKeyGroups();
    +           final int deduplicationSetSize = 1 + minimumCapacity / 
keyGroupsInLocalRange;
    +           this.deduplicationMapsByKeyGroup = new 
HashMap[keyGroupsInLocalRange];
    +           for (int i = 0; i < keyGroupsInLocalRange; ++i) {
    +                   deduplicationMapsByKeyGroup[i] = new 
HashMap<>(deduplicationSetSize);
    +           }
    +   }
    +
    +   @Override
    +   @Nullable
    +   public T poll() {
    +           final T toRemove = super.poll();
    +           if (toRemove != null) {
    +                   return getDedupMapForElement(toRemove).remove(toRemove);
    +           } else {
    +                   return null;
    +           }
    +   }
    +
    +   /**
    +    * Adds the element to the queue. In contrast to the superclass and to 
maintain set semantics, this happens only if
    +    * no such element is already contained (determined by {@link 
#equals(Object)}).
    +    *
    +    * @return <code>true</code> if the operation changed the head element 
or if is it unclear if the head element changed.
    +    * Only returns <code>false</code> iff the head element was not changed 
by this operation.
    +    */
    +   @Override
    +   public boolean add(@Nonnull T element) {
    +           return getDedupMapForElement(element).putIfAbsent(element, 
element) == null && super.add(element);
    +   }
    +
    +   /**
    +    * In contrast to the superclass and to maintain set semantics, removal 
here is based on comparing the given element
    +    * via {@link #equals(Object)}.
    +    *
    +    * @return <code>true</code> if the operation changed the head element 
or if is it unclear if the head element changed.
    +    * Only returns <code>false</code> iff the head element was not changed 
by this operation.
    +    */
    +   @Override
    +   public boolean remove(@Nonnull T elementToRemove) {
    +           T storedElement = 
getDedupMapForElement(elementToRemove).remove(elementToRemove);
    +           return storedElement != null && super.remove(storedElement);
    +   }
    +
    +   @Override
    +   public void clear() {
    +           super.clear();
    +           for (HashMap<?, ?> elementHashMap :
    +                   deduplicationMapsByKeyGroup) {
    --- End diff --
    
    👍 


---

Reply via email to