busbey commented on a change in pull request #2985: URL: https://github.com/apache/hadoop/pull/2985#discussion_r632984329
########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { Review comment: It looks like some of the javadocs are directly copied from Guava's `Sets`? Please include a note in the Javadoc here that the method api/javadocs are from Guava's `Sets` class (pref w/a specific guava release noted). Hopefully that will make references to use of "ImmutableSet#of" in javadocs clear enough. Alternatively rewrite any javadocs originally from guava. If any of the implementations are copied from Guava's `Sets` class also indicate that. ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.retainAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + * Should be used with HashSet only. + */ + public static <E> Set<E> union( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.addAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + * This method is used to find difference for HashSets. For TreeSets with + * strict order requirement, recommended method is + * {@link #differenceInTreeSets(Set, Set)}. + */ + public static <E> Set<E> difference( + final Set<E> set1, final Set<E> set2) { Review comment: This doesn't remove stuff from set1, so recommend being clear in the phrasing. e.g. ``` The difference is defined as elements that are in set1 but not in set 2. ``` ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } Review comment: for these two "make an empty set" implementations, what's the advantage of having an implementation here rather than following the advice from Guava for jdk7+ to use the constructor directly? ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.retainAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + * Should be used with HashSet only. + */ + public static <E> Set<E> union( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.addAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + * This method is used to find difference for HashSets. For TreeSets with + * strict order requirement, recommended method is + * {@link #differenceInTreeSets(Set, Set)}. + */ + public static <E> Set<E> difference( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.removeAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two Tree sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + */ + public static <E> Set<E> differenceInTreeSets( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new TreeSet<>(set1); + newSet.removeAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + */ + public static <E> Set<E> symmetricDifference( + final Set<E> set1, final Set<E> set2) { Review comment: include a definition of symmetric difference, e.g. `The symmetric difference is defined as elements that are in set1 or set2, but not in both.` ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } Review comment: the javadoc for `newHashSetWithExpectedSize` says that this method call to `HashSet#HashSet(int)` won't fulfill the contract of this method. If the handful of places we call this method can be reworked to somehow not rely on the behavior, we should do that. There's very few of them, and e.g. the use in `FsDatasetImpl` should probably just be calling `new HashSet(Collection)`. If that doesn't work for whatever reason, then I recommend copying the implementation of Guava's `Maps.capacity` here as a private method and using it, since that's what the guava version of this method does. ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { Review comment: please add ```the intersection is defined as elements that are in both set1 and set2.``` ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.retainAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + * Should be used with HashSet only. + */ + public static <E> Set<E> union( + final Set<E> set1, final Set<E> set2) { Review comment: looks like Javadoc for the wrong method? ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.retainAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + * Should be used with HashSet only. + */ + public static <E> Set<E> union( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.addAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + * This method is used to find difference for HashSets. For TreeSets with + * strict order requirement, recommended method is + * {@link #differenceInTreeSets(Set, Set)}. + */ + public static <E> Set<E> difference( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.removeAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two Tree sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + */ + public static <E> Set<E> differenceInTreeSets( + final Set<E> set1, final Set<E> set2) { Review comment: This doesn't modify set1, so recommend being clear in phrasing, e.g. `the difference is defined as elements that are in set1 but not in set2` ########## File path: hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Sets.java ########## @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import org.apache.hadoop.classification.InterfaceAudience; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Static utility methods pertaining to {@link Set} instances. + * This class is Hadoop's internal use alternative to Guava's Sets + * utility class. + */ [email protected] +public final class Sets { + + private Sets() { + // empty + } + + /** + * Creates a <i>mutable</i>, initially empty {@code HashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} + * instead. Otherwise, strongly consider using a {@code LinkedHashSet} + * instead, at the cost of increased memory footprint, to get + * deterministic iteration behavior. + */ + public static <E> HashSet<E> newHashSet() { + return new HashSet<E>(); + } + + /** + * Creates a <i>mutable</i>, empty {@code TreeSet} instance sorted by the + * natural sort ordering of its elements. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSortedSet#of() + * instead. + * + * @return a new, empty {@code TreeSet} + */ + public static <E extends Comparable> TreeSet<E> newTreeSet() { + return new TreeSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance initially containing + * the given elements. + * + * <p><b>Note:</b> if elements are non-null and won't be added or removed + * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) + * instead. If {@code E} is an {@link Enum} type, use + * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider + * using a {@code LinkedHashSet} instead, at the cost of increased memory + * footprint, to get deterministic iteration behavior. + * + * <p>This method is just a small convenience, either for + * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an + * empty set then calling {@link Collections#addAll}. + */ + @SafeVarargs + public static <E> HashSet<E> newHashSet(E... elements) { + HashSet<E> set = newHashSetWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set then calling + * {@link Collection#addAll} or Iterables#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change + * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.) + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, use + * newEnumSet(Iterable, Class) instead. + */ + public static <E> HashSet<E> newHashSet(Iterable<? extends E> elements) { + return (elements instanceof Collection) + ? new HashSet<E>(cast(elements)) + : newHashSet(elements.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code TreeSet} instance containing the given + * elements sorted by their natural ordering. + * + * <p><b>Note:</b> if mutability is not required, use + * ImmutableSortedSet#copyOf(Iterable) instead. + * + * <p><b>Note:</b> If {@code elements} is a {@code SortedSet} with an + * explicit comparator, this method has different behavior than + * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} + * with that comparator. + * + * <p><b>Note for Java 7 and later:</b> this method is now unnecessary and + * should be treated as deprecated. Instead, use the {@code TreeSet} + * constructor directly, taking advantage of the new + * <a href="http://goo.gl/iz2Wi">"diamond" syntax</a>. + * + * <p>This method is just a small convenience for creating an empty set and + * then calling Iterables#addAll. This method is not very useful and will + * likely be deprecated in the future. + * + * @param elements the elements that the set should contain + * @return a new {@code TreeSet} containing those elements (minus duplicates) + */ + public static <E extends Comparable> TreeSet<E> newTreeSet( + Iterable<? extends E> elements) { + TreeSet<E> set = newTreeSet(); + addAll(set, elements); + return set; + } + + private static <E extends Comparable> boolean addAll(TreeSet<E> addTo, + Iterable<? extends E> elementsToAdd) { + if (elementsToAdd instanceof Collection) { + Collection<? extends E> c = cast(elementsToAdd); + return addTo.addAll(c); + } + if (elementsToAdd == null) { + throw new NullPointerException(); + } + return addAll(addTo, elementsToAdd.iterator()); + } + + /** + * Creates a <i>mutable</i> {@code HashSet} instance containing the given + * elements. A very thin convenience for creating an empty set and then + * calling Iterators#addAll. + * + * <p><b>Note:</b> if mutability is not required and the elements are + * non-null, use ImmutableSet#copyOf(Iterator) instead. + * + * <p><b>Note:</b> if {@code E} is an {@link Enum} type, you should create + * an {@link EnumSet} instead. + * + * <p>Overall, this method is not very useful and will likely be deprecated + * in the future. + */ + public static <E> HashSet<E> newHashSet(Iterator<? extends E> elements) { + HashSet<E> set = newHashSet(); + addAll(set, elements); + return set; + } + + /** + * Returns a new hash set using the smallest initial table size that can hold + * {@code expectedSize} elements without resizing. Note that this is not what + * {@link HashSet#HashSet(int)} does, but it is what most users want and + * expect it to do. + * + * <p>This behavior can't be broadly guaranteed, but has been tested with + * OpenJDK 1.7 and 1.8. + * + * @param expectedSize the number of elements you expect to add to the + * returned set + * @return a new, empty hash set with enough capacity to hold + * {@code expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> HashSet<E> newHashSetWithExpectedSize(int expectedSize) { + return new HashSet<E>(expectedSize); + } + + private static <E> Collection<E> cast(Iterable<E> iterable) { + return (Collection<E>) iterable; + } + + private static <E> boolean addAll(Collection<E> addTo, + Iterator<? extends E> iterator) { + if (addTo == null) { + throw new NullPointerException(); + } + if (iterator == null) { + throw new NullPointerException(); + } + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= addTo.add(iterator.next()); + } + return wasModified; + } + + /** + * Returns the intersection of two sets as an unmodifiable set. + */ + public static <E> Set<E> intersection(final Set<E> set1, + final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.retainAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + * Should be used with HashSet only. + */ + public static <E> Set<E> union( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.addAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + * This method is used to find difference for HashSets. For TreeSets with + * strict order requirement, recommended method is + * {@link #differenceInTreeSets(Set, Set)}. + */ + public static <E> Set<E> difference( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new HashSet<>(set1); + newSet.removeAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the difference of two Tree sets as an unmodifiable set. + * Removes all element from set1 that are contained in set2. + */ + public static <E> Set<E> differenceInTreeSets( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> newSet = new TreeSet<>(set1); + newSet.removeAll(set2); + return Collections.unmodifiableSet(newSet); + } + + /** + * Returns the symmetric difference of two sets as an unmodifiable set. + */ + public static <E> Set<E> symmetricDifference( + final Set<E> set1, final Set<E> set2) { + if (set1 == null) { + throw new NullPointerException("set1"); + } + if (set2 == null) { + throw new NullPointerException("set2"); + } + Set<E> intersection = new HashSet<>(set1); + intersection.retainAll(set2); + Set<E> symmetricDifference = new HashSet<>(set1); + symmetricDifference.addAll(set2); + symmetricDifference.removeAll(intersection); + return Collections.unmodifiableSet(symmetricDifference); + } + + /** + * Creates a thread-safe set backed by a hash map. The set is backed by a + * {@link ConcurrentHashMap} instance, and thus carries the same concurrency + * guarantees. + * + * <p>Unlike {@code HashSet}, this class does NOT allow {@code null} to be + * used as an element. The set is serializable. + * + * @return a new, empty thread-safe {@code Set} + */ + public static <E> Set<E> newConcurrentHashSet() { + return Collections.newSetFromMap(new ConcurrentHashMap<E, Boolean>()); + } + + /** + * Creates a <i>mutable</i>, empty {@code LinkedHashSet} instance. + * + * <p><b>Note:</b> if mutability is not required, use ImmutableSet#of() + * instead. + * + * @return a new, empty {@code LinkedHashSet} + */ + public static <E> LinkedHashSet<E> newLinkedHashSet() { + return new LinkedHashSet<E>(); + } Review comment: For this "make an empty set" implementation, what's the advantage of having an implementation here rather than following the advice from Guava for jdk7+ to use the constructor directly? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
