Updated Branches: refs/heads/master 711eaedff -> a9a65eae1
CRUNCH-118: Minor cleanup. Project: http://git-wip-us.apache.org/repos/asf/incubator-crunch/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-crunch/commit/a9a65eae Tree: http://git-wip-us.apache.org/repos/asf/incubator-crunch/tree/a9a65eae Diff: http://git-wip-us.apache.org/repos/asf/incubator-crunch/diff/a9a65eae Branch: refs/heads/master Commit: a9a65eae1fd3f8f350587d7499a7ceaee3be0a24 Parents: affa10f Author: Matthias Friedrich <[email protected]> Authored: Sat Nov 24 11:37:46 2012 +0100 Committer: Matthias Friedrich <[email protected]> Committed: Sat Nov 24 11:37:46 2012 +0100 ---------------------------------------------------------------------- .../java/org/apache/crunch/fn/Aggregators.java | 2 +- .../main/java/org/apache/crunch/lib/Distinct.java | 2 +- .../java/org/apache/crunch/fn/AggregatorsTest.java | 6 +--- .../java/org/apache/crunch/lib/DistinctTest.java | 21 ++++++++++++-- 4 files changed, 22 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a9a65eae/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java b/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java index 524983d..0ac79e2 100644 --- a/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java +++ b/crunch/src/main/java/org/apache/crunch/fn/Aggregators.java @@ -391,7 +391,7 @@ public final class Aggregators { } /** - * Collect a random sample of unique elements from the input, where 'unique' is defined by + * Collect a sample of unique elements from the input, where 'unique' is defined by * the {@code equals} method for the input objects. No guarantees are made about which * elements will be returned, simply that there will not be any more than the given sample * size for any key. http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a9a65eae/crunch/src/main/java/org/apache/crunch/lib/Distinct.java ---------------------------------------------------------------------- diff --git a/crunch/src/main/java/org/apache/crunch/lib/Distinct.java b/crunch/src/main/java/org/apache/crunch/lib/Distinct.java index dae11f7..15f7205 100644 --- a/crunch/src/main/java/org/apache/crunch/lib/Distinct.java +++ b/crunch/src/main/java/org/apache/crunch/lib/Distinct.java @@ -91,7 +91,7 @@ public final class Distinct { } } - private static class PostDistinctFn<S> extends DoFn<Pair<S, java.lang.Iterable<java.lang.Void>>, S> { + private static class PostDistinctFn<S> extends DoFn<Pair<S, Iterable<Void>>, S> { @Override public void process(Pair<S, Iterable<Void>> input, Emitter<S> emitter) { emitter.emit(input.first()); http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a9a65eae/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java ---------------------------------------------------------------------- diff --git a/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java b/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java index d7daec1..6ee1972 100644 --- a/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java +++ b/crunch/src/test/java/org/apache/crunch/fn/AggregatorsTest.java @@ -38,7 +38,6 @@ import static org.apache.crunch.fn.Aggregators.SUM_LONGS; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import java.math.BigInteger; @@ -131,9 +130,8 @@ public class AggregatorsTest { is(ImmutableSet.of(17, 29, 16))); Iterable<Integer> samp = apply(Aggregators.<Integer>SAMPLE_UNIQUE_ELEMENTS(2), 17, 29, 16, 17, 29, 16); - List<Integer> elements = ImmutableList.copyOf(samp); - assertEquals(2, elements.size()); - assertFalse(elements.get(0).equals(elements.get(1))); + assertThat(Iterables.size(samp), is(2)); + assertThat(ImmutableSet.copyOf(samp).size(), is(2)); // check that the two elements are unique } @Test http://git-wip-us.apache.org/repos/asf/incubator-crunch/blob/a9a65eae/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java ---------------------------------------------------------------------- diff --git a/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java b/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java index 4c9d816..8c0b3bf 100644 --- a/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java +++ b/crunch/src/test/java/org/apache/crunch/lib/DistinctTest.java @@ -19,6 +19,9 @@ package org.apache.crunch.lib; import static org.junit.Assert.assertEquals; +import java.util.Arrays; +import java.util.List; + import org.apache.crunch.PCollection; import org.apache.crunch.impl.mem.MemPipeline; import org.apache.crunch.types.avro.Avros; @@ -27,11 +30,23 @@ import org.junit.Test; import com.google.common.collect.ImmutableSet; public class DistinctTest { + private static final List<Integer> DATA = Arrays.asList( + 17, 29, 17, 29, 17, 29, 36, 45, 17, 45, 36, 29 + ); + @Test public void testDistinct() { - PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), - 17, 29, 17, 29, 17, 29, 36, 45, 17, 45, 36, 29); + PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA); Iterable<Integer> unique = Distinct.distinct(input).materialize(); - assertEquals(ImmutableSet.of(17, 29, 36, 45), ImmutableSet.copyOf(unique)); + + assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique)); + } + + @Test + public void testDistinctFlush() { + PCollection<Integer> input = MemPipeline.typedCollectionOf(Avros.ints(), DATA); + Iterable<Integer> unique = Distinct.distinct(input, 2).materialize(); + + assertEquals(ImmutableSet.copyOf(DATA), ImmutableSet.copyOf(unique)); } }
