http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java deleted file mode 100644 index f1ca770..0000000 --- a/crunch/src/it/java/org/apache/crunch/lib/join/MultiAvroSchemaJoinIT.java +++ /dev/null @@ -1,121 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.crunch.lib.join; - -import static org.apache.crunch.types.avro.Avros.records; -import static org.apache.crunch.types.avro.Avros.strings; -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.util.List; - -import org.apache.avro.Schema; -import org.apache.avro.file.DataFileWriter; -import org.apache.avro.io.DatumWriter; -import org.apache.avro.specific.SpecificDatumWriter; -import org.apache.avro.specific.SpecificRecord; -import org.apache.crunch.MapFn; -import org.apache.crunch.PCollection; -import org.apache.crunch.Pair; -import org.apache.crunch.Pipeline; -import org.apache.crunch.impl.mr.MRPipeline; -import org.apache.crunch.io.From; -import org.apache.crunch.test.Employee; -import org.apache.crunch.test.Person; -import org.apache.crunch.test.TemporaryPath; -import org.apache.crunch.test.TemporaryPaths; -import org.junit.After; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; - -public class MultiAvroSchemaJoinIT { - - private File personFile; - private File employeeFile; - @Rule - public TemporaryPath tmpDir = TemporaryPaths.create(); - - @Before - public void setUp() throws Exception { - this.personFile = File.createTempFile("person", ".avro"); - this.employeeFile = File.createTempFile("employee", ".avro"); - - DatumWriter<Person> pdw = new SpecificDatumWriter<Person>(); - DataFileWriter<Person> pfw = new DataFileWriter<Person>(pdw); - pfw.create(Person.SCHEMA$, personFile); - Person p1 = new Person(); - p1.name = "Josh"; - p1.age = 19; - p1.siblingnames = ImmutableList.<CharSequence> of("Kate", "Mike"); - pfw.append(p1); - Person p2 = new Person(); - p2.name = "Kate"; - p2.age = 17;; - p2.siblingnames = ImmutableList.<CharSequence> of("Josh", "Mike"); - pfw.append(p2); - Person p3 = new Person(); - p3.name = "Mike"; - p3.age = 12; - p3.siblingnames = ImmutableList.<CharSequence> of("Josh", "Kate"); - pfw.append(p3); - pfw.close(); - - DatumWriter<Employee> edw = new SpecificDatumWriter<Employee>(); - DataFileWriter<Employee> efw = new DataFileWriter<Employee>(edw); - efw.create(Employee.SCHEMA$, employeeFile); - Employee e1 = new Employee(); - e1.name = "Kate"; - e1.salary = 100000; - e1.department = "Marketing"; - efw.append(e1); - efw.close(); - } - - @After - public void tearDown() throws Exception { - personFile.delete(); - employeeFile.delete(); - } - - public static class NameFn<K extends SpecificRecord> extends MapFn<K, String> { - @Override - public String map(K input) { - Schema s = input.getSchema(); - Schema.Field f = s.getField("name"); - return input.get(f.pos()).toString(); - } - } - - @Test - public void testJoin() throws Exception { - Pipeline p = new MRPipeline(MultiAvroSchemaJoinIT.class, tmpDir.getDefaultConfiguration()); - PCollection<Person> people = p.read(From.avroFile(personFile.getAbsolutePath(), records(Person.class))); - PCollection<Employee> employees = p.read(From.avroFile(employeeFile.getAbsolutePath(), records(Employee.class))); - - Iterable<Pair<Person, Employee>> result = people.by(new NameFn<Person>(), strings()) - .join(employees.by(new NameFn<Employee>(), strings())).values().materialize(); - List<Pair<Person, Employee>> v = Lists.newArrayList(result); - assertEquals(1, v.size()); - assertEquals("Kate", v.get(0).first().name.toString()); - assertEquals("Kate", v.get(0).second().name.toString()); - } -}
http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java b/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java deleted file mode 100644 index d889b61..0000000 --- a/crunch/src/it/java/org/apache/crunch/lib/join/RightOuterJoinIT.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.crunch.lib.join; - -import static org.junit.Assert.assertTrue; - -import org.apache.crunch.Pair; -import org.apache.crunch.types.PTypeFamily; - -public class RightOuterJoinIT extends JoinTester { - @Override - public void assertPassed(Iterable<Pair<String, Long>> lines) { - boolean passed1 = false; - boolean passed2 = true; - boolean passed3 = false; - for (Pair<String, Long> line : lines) { - if ("wretched".equals(line.first()) && 24 == line.second()) { - passed1 = true; - } - if ("againe".equals(line.first())) { - passed2 = false; - } - if ("Montparnasse.".equals(line.first()) && 2 == line.second()) { - passed3 = true; - } - } - assertTrue(passed1); - assertTrue(passed2); - assertTrue(passed3); - } - - @Override - protected JoinFn<String, Long, Long> getJoinFn(PTypeFamily typeFamily) { - return new RightOuterJoinFn<String, Long, Long>(typeFamily.strings(), typeFamily.longs()); - } -} http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java b/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java deleted file mode 100644 index 97cf0de..0000000 --- a/crunch/src/it/java/org/apache/crunch/test/TemporaryPaths.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.crunch.test; - -import org.apache.crunch.impl.mr.run.RuntimeParameters; -import org.apache.hadoop.conf.Configuration; - - -/** - * Utilities for working with {@link TemporaryPath}. - */ -public final class TemporaryPaths { - - /** - * Static factory returning a {@link TemporaryPath} with adjusted - * {@link Configuration} properties. - */ - public static TemporaryPath create() { - return new TemporaryPath(RuntimeParameters.TMP_DIR, "hadoop.tmp.dir"); - } - - private TemporaryPaths() { - // nothing - } -} http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/java/org/apache/crunch/test/Tests.java ---------------------------------------------------------------------- diff --git a/crunch/src/it/java/org/apache/crunch/test/Tests.java b/crunch/src/it/java/org/apache/crunch/test/Tests.java deleted file mode 100644 index e381c1a..0000000 --- a/crunch/src/it/java/org/apache/crunch/test/Tests.java +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.crunch.test; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkNotNull; - -import java.io.IOException; -import java.util.Collection; - -import org.apache.crunch.Pipeline; -import org.apache.crunch.impl.mem.MemPipeline; -import org.apache.crunch.impl.mr.MRPipeline; -import org.apache.hadoop.io.Writable; -import org.junit.runners.Parameterized.Parameters; - -import com.google.common.collect.ImmutableList; -import com.google.common.io.ByteArrayDataOutput; -import com.google.common.io.ByteStreams; -import com.google.common.io.Resources; - - -/** - * Utilities for integration tests. - */ -public final class Tests { - - private Tests() { - // nothing - } - - /** - * Get the path to and integration test resource file, as per naming convention. - * - * @param testCase The executing test case instance - * @param resourceName The file name of the resource - * @return The path to the resource (never null) - * @throws IllegalArgumentException Thrown if the resource doesn't exist - */ - public static String pathTo(Object testCase, String resourceName) { - String qualifiedName = resource(testCase, resourceName); - return Resources.getResource(qualifiedName).getFile(); - } - - /** - * This doesn't check whether the resource exists! - * - * @param testCase - * @param resourceName - * @return The path to the resource (never null) - */ - public static String resource(Object testCase, String resourceName) { - checkNotNull(testCase); - checkNotNull(resourceName); - - // Note: We append "Data" because otherwise Eclipse would complain about the - // the case's class name clashing with the resource directory's name. - return testCase.getClass().getName().replaceAll("\\.", "/") + "Data/" + resourceName; - } - - /** - * Return our two types of {@link Pipeline}s for a JUnit Parameterized test. - * - * @param testCase The executing test case's class - * @return The collection to return from a {@link Parameters} provider method - */ - public static Collection<Object[]> pipelinesParams(Class<?> testCase) { - return ImmutableList.copyOf( - new Object[][] { { MemPipeline.getInstance() }, { new MRPipeline(testCase) } - }); - } - - /** - * Serialize the given Writable into a byte array. - * - * @param value The instance to serialize - * @return The serialized data - */ - public static byte[] serialize(Writable value) { - checkNotNull(value); - try { - ByteArrayDataOutput out = ByteStreams.newDataOutput(); - value.write(out); - return out.toByteArray(); - } catch (IOException e) { - throw new IllegalStateException("cannot serialize", e); - } - } - - /** - * Serialize the src Writable into a byte array, then deserialize it into dest. - * @param src The instance to serialize - * @param dest The instance to deserialize into - * @return dest, for convenience - */ - public static <T extends Writable> T roundtrip(Writable src, T dest) { - checkNotNull(src); - checkNotNull(dest); - checkArgument(src != dest, "src and dest may not be the same instance"); - - try { - byte[] data = serialize(src); - dest.readFields(ByteStreams.newDataInput(data)); - } catch (IOException e) { - throw new IllegalStateException("cannot deserialize", e); - } - return dest; - } -} http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/customers.txt ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/customers.txt b/crunch/src/it/resources/customers.txt deleted file mode 100644 index 98f3f3d..0000000 --- a/crunch/src/it/resources/customers.txt +++ /dev/null @@ -1,4 +0,0 @@ -111|John Doe -222|Jane Doe -333|Someone Else -444|Has No Orders \ No newline at end of file http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/docs.txt ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/docs.txt b/crunch/src/it/resources/docs.txt deleted file mode 100644 index 90a3f65..0000000 --- a/crunch/src/it/resources/docs.txt +++ /dev/null @@ -1,6 +0,0 @@ -A this doc has this text -A and this text as well -A but also this -B this doc has some text -B but not as much as the last -B doc http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/emptyTextFile.txt ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/emptyTextFile.txt b/crunch/src/it/resources/emptyTextFile.txt deleted file mode 100644 index e69de29..0000000 http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/letters.txt ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/letters.txt b/crunch/src/it/resources/letters.txt deleted file mode 100644 index 916bfc9..0000000 --- a/crunch/src/it/resources/letters.txt +++ /dev/null @@ -1,2 +0,0 @@ -a -bb \ No newline at end of file http://git-wip-us.apache.org/repos/asf/crunch/blob/890e0086/crunch/src/it/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/crunch/src/it/resources/log4j.properties b/crunch/src/it/resources/log4j.properties deleted file mode 100644 index 5d144a0..0000000 --- a/crunch/src/it/resources/log4j.properties +++ /dev/null @@ -1,29 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# ***** Set root logger level to INFO and its only appender to A. -log4j.logger.org.apache.crunch=info, A - -# Log warnings on Hadoop for the local runner when testing -log4j.logger.org.apache.hadoop=warn, A -# Except for Configuration, which is chatty. -log4j.logger.org.apache.hadoop.conf.Configuration=error, A - -# ***** A is set to be a ConsoleAppender. -log4j.appender.A=org.apache.log4j.ConsoleAppender -# ***** A uses PatternLayout. -log4j.appender.A.layout=org.apache.log4j.PatternLayout -log4j.appender.A.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
