Repository: crunch Updated Branches: refs/heads/master dee0fcf51 -> 363c8243b
http://git-wip-us.apache.org/repos/asf/crunch/blob/363c8243/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/AddressBook.java ---------------------------------------------------------------------- diff --git a/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/AddressBook.java b/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/AddressBook.java new file mode 100644 index 0000000..1a52273 --- /dev/null +++ b/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/AddressBook.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch.test.orc.pojos; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +public class AddressBook { + + public static final String TYPE_STR = "struct<myname:string,mynumbers:array<string>," + + "contacts:map<string," + Person.TYPE_STR + ">,updatetime:timestamp,signature:binary>"; + public static final TypeInfo TYPE_INFO = TypeInfoUtils.getTypeInfoFromTypeString(TYPE_STR); + + private String myName; + + private List<String> myNumbers; + private Map<String, Person> contacts; + + private Timestamp updateTime; + private byte[] signature; + + public AddressBook() {} + + public AddressBook(String myName, List<String> myNumbers, + Map<String, Person> contacts, Timestamp updateTime, byte[] signature) { + super(); + this.myName = myName; + this.myNumbers = myNumbers; + this.contacts = contacts; + this.updateTime = updateTime; + this.signature = signature; + } + + public String getMyName() { + return myName; + } + + public void setMyName(String myName) { + this.myName = myName; + } + + public List<String> getMyNumbers() { + return myNumbers; + } + + public void setMyNumbers(List<String> myNumbers) { + this.myNumbers = myNumbers; + } + + public Map<String, Person> getContacts() { + return contacts; + } + + public void setContacts(Map<String, Person> contacts) { + this.contacts = contacts; + } + + public Timestamp getUpdateTime() { + return updateTime; + } + + public void setUpdateTime(Timestamp updateTime) { + this.updateTime = updateTime; + } + + public byte[] getSignature() { + return signature; + } + + public void setSignature(byte[] signature) { + this.signature = signature; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((contacts == null) ? 0 : contacts.hashCode()); + result = prime * result + ((myName == null) ? 0 : myName.hashCode()); + result = prime * result + ((myNumbers == null) ? 0 : myNumbers.hashCode()); + result = prime * result + Arrays.hashCode(signature); + result = prime * result + + ((updateTime == null) ? 0 : updateTime.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + AddressBook other = (AddressBook) obj; + if (contacts == null) { + if (other.contacts != null) + return false; + } else if (!contacts.equals(other.contacts)) + return false; + if (myName == null) { + if (other.myName != null) + return false; + } else if (!myName.equals(other.myName)) + return false; + if (myNumbers == null) { + if (other.myNumbers != null) + return false; + } else if (!myNumbers.equals(other.myNumbers)) + return false; + if (!Arrays.equals(signature, other.signature)) + return false; + if (updateTime == null) { + if (other.updateTime != null) + return false; + } else if (!updateTime.equals(other.updateTime)) + return false; + return true; + } + +} http://git-wip-us.apache.org/repos/asf/crunch/blob/363c8243/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/Person.java ---------------------------------------------------------------------- diff --git a/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/Person.java b/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/Person.java new file mode 100644 index 0000000..176b148 --- /dev/null +++ b/crunch-hive/src/test/java/org/apache/crunch/test/orc/pojos/Person.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch.test.orc.pojos; + +import java.util.List; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +public class Person { + + public static final String TYPE_STR = "struct<name:string,age:int,number:array<string>>"; + public static final TypeInfo TYPE_INFO = TypeInfoUtils.getTypeInfoFromTypeString(TYPE_STR); + + private String name; + private int age; + private List<String> numbers; + + public Person() {} + + public Person(String name, int age, List<String> numbers) { + super(); + this.name = name; + this.age = age; + this.numbers = numbers; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public int getAge() { + return age; + } + + public void setAge(int age) { + this.age = age; + } + + public List<String> getNumbers() { + return numbers; + } + + public void setNumbers(List<String> numbers) { + this.numbers = numbers; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + age; + result = prime * result + ((name == null) ? 0 : name.hashCode()); + result = prime * result + ((numbers == null) ? 0 : numbers.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Person other = (Person) obj; + if (age != other.age) + return false; + if (name == null) { + if (other.name != null) + return false; + } else if (!name.equals(other.name)) + return false; + if (numbers == null) { + if (other.numbers != null) + return false; + } else if (!numbers.equals(other.numbers)) + return false; + return true; + } + +} http://git-wip-us.apache.org/repos/asf/crunch/blob/363c8243/crunch-hive/src/test/java/org/apache/crunch/types/orc/OrcsTest.java ---------------------------------------------------------------------- diff --git a/crunch-hive/src/test/java/org/apache/crunch/types/orc/OrcsTest.java b/crunch-hive/src/test/java/org/apache/crunch/types/orc/OrcsTest.java new file mode 100644 index 0000000..5b92f29 --- /dev/null +++ b/crunch-hive/src/test/java/org/apache/crunch/types/orc/OrcsTest.java @@ -0,0 +1,141 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch.types.orc; + +import static org.junit.Assert.*; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.apache.crunch.Pair; +import org.apache.crunch.TupleN; +import org.apache.crunch.io.orc.OrcWritable; +import org.apache.crunch.test.orc.pojos.AddressBook; +import org.apache.crunch.test.orc.pojos.Person; +import org.apache.crunch.types.PType; +import org.apache.crunch.types.writable.Writables; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.io.orc.OrcStruct; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.junit.Test; + +public class OrcsTest { + + @SuppressWarnings({ "unchecked", "rawtypes" }) + protected static void testInputOutputFn(PType ptype, Object java, OrcWritable orc) { + initialize(ptype); + assertEquals(java, ptype.getInputMapFn().map(orc)); + assertEquals(orc, ptype.getOutputMapFn().map(java)); + } + + private static void initialize(PType ptype) { + ptype.getInputMapFn().initialize(); + ptype.getOutputMapFn().initialize(); + } + + @Test + public void testOrcs() { + String mapValueTypeStr = "struct<a:string,b:int>"; + String typeStr = "struct<a:int,b:string,c:float,d:varchar(64)" + + ",e:map<string," + mapValueTypeStr + ">>"; + TypeInfo mapValueTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(mapValueTypeStr); + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr); + PType<OrcStruct> ptype = Orcs.orcs(typeInfo); + + HiveVarchar varchar = new HiveVarchar("Hello World", 32); + Map<Text, OrcStruct> map = new HashMap<Text, OrcStruct>(); + OrcStruct value = OrcUtils.createOrcStruct(mapValueTypeInfo, new Text("age"), new IntWritable(24)); + map.put(new Text("Bob"), value); + OrcStruct s = OrcUtils.createOrcStruct(typeInfo, new IntWritable(1024), new Text("Alice"), + null, new HiveVarcharWritable(varchar), map); + OrcWritable w = new OrcWritable(); + w.set(s); + + testInputOutputFn(ptype, s, w); + } + + @Test + public void testReflects() { + PType<AddressBook> ptype = Orcs.reflects(AddressBook.class); + + AddressBook ab = new AddressBook(); + ab.setMyName("John Smith"); + ab.setMyNumbers(Arrays.asList("919-333-4452", "650-777-4329")); + Map<String, Person> contacts = new HashMap<String, Person>(); + contacts.put("Alice", new Person("Alice", 23, Arrays.asList("666-677-9999"))); + contacts.put("Bob", new Person("Bob", 26, Arrays.asList("999-888-1132", "000-222-9934"))); + contacts.put("David", null); + ab.setContacts(contacts); + Timestamp now = new Timestamp(System.currentTimeMillis()); + ab.setUpdateTime(now); + byte[] signature = {0, 0, 64, 68, 39, 0}; + ab.setSignature(signature); + + Map<Text, OrcStruct> map = new HashMap<Text, OrcStruct>(); + map.put(new Text("Alice"), OrcUtils.createOrcStruct(Person.TYPE_INFO, new Text("Alice"), new IntWritable(23), + Arrays.asList(new Text("666-677-9999")))); + map.put(new Text("Bob"), OrcUtils.createOrcStruct(Person.TYPE_INFO, new Text("Bob"), new IntWritable(26), + Arrays.asList(new Text("999-888-1132"), new Text("000-222-9934")))); + map.put(new Text("David"), null); + OrcStruct s = OrcUtils.createOrcStruct(AddressBook.TYPE_INFO, new Text("John Smith"), + Arrays.asList(new Text("919-333-4452"), new Text("650-777-4329")), map, new TimestampWritable(now), + new BytesWritable(signature)); + OrcWritable w = new OrcWritable(); + w.set(s); + + testInputOutputFn(ptype, ab, w); + } + + @Test + public void testTuples() { + PType<TupleN> ptype = Orcs.tuples(Writables.ints(), Writables.strings(), Orcs.reflects(Person.class), + Writables.tableOf(Writables.strings(), Orcs.reflects(Person.class))); + + TupleN t = new TupleN(1, "John Smith", new Person("Alice", 23, Arrays.asList("666-677-9999")), + new Pair<String, Person>("Bob", new Person("Bob", 26, Arrays.asList("999-888-1132", "000-222-9934")))); + + String typeStr = "struct<a:int,b:string,c:" + Person.TYPE_STR + ",d:struct<d1:string,d2:" + Person.TYPE_STR + ">>"; + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeStr); + String tableTypeStr = "struct<a:string,b:" + Person.TYPE_STR + ">"; + TypeInfo tableTypeInfo = TypeInfoUtils.getTypeInfoFromTypeString(tableTypeStr); + + OrcStruct s = OrcUtils.createOrcStruct(typeInfo, new IntWritable(1), new Text("John Smith"), + OrcUtils.createOrcStruct(Person.TYPE_INFO, new Text("Alice"), new IntWritable(23), + Arrays.asList(new Text("666-677-9999")) + ), + OrcUtils.createOrcStruct(tableTypeInfo, new Text("Bob"), + OrcUtils.createOrcStruct(Person.TYPE_INFO, new Text("Bob"), new IntWritable(26), + Arrays.asList(new Text("999-888-1132"), new Text("000-222-9934")) + ) + ) + ); + OrcWritable w = new OrcWritable(); + w.set(s); + + testInputOutputFn(ptype, t, w); + } + +} http://git-wip-us.apache.org/repos/asf/crunch/blob/363c8243/crunch-hive/src/test/java/org/apache/crunch/types/orc/TupleObjectInspectorTest.java ---------------------------------------------------------------------- diff --git a/crunch-hive/src/test/java/org/apache/crunch/types/orc/TupleObjectInspectorTest.java b/crunch-hive/src/test/java/org/apache/crunch/types/orc/TupleObjectInspectorTest.java new file mode 100644 index 0000000..01666e2 --- /dev/null +++ b/crunch-hive/src/test/java/org/apache/crunch/types/orc/TupleObjectInspectorTest.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.crunch.types.orc; + +import static org.junit.Assert.*; + +import java.nio.ByteBuffer; +import java.util.List; + +import org.apache.crunch.Pair; +import org.apache.crunch.TupleN; +import org.apache.crunch.types.TupleFactory; +import org.apache.crunch.types.orc.TupleObjectInspector.ByteBufferObjectInspector; +import org.apache.crunch.types.writable.Writables; +import org.apache.hadoop.io.BytesWritable; +import org.junit.Test; + +public class TupleObjectInspectorTest { + + @Test + public void testTupleObjectInspector() { + // test get + TupleObjectInspector<TupleN> toi = new TupleObjectInspector<TupleN>(TupleFactory.TUPLEN, + Writables.strings(), Writables.ints(), Writables.floats()); + TupleN tuple = new TupleN("Alice", 28, 165.2f); + List<Object> values = toi.getStructFieldsDataAsList(tuple); + assertEquals("Alice", values.get(0)); + assertEquals(28, values.get(1)); + assertEquals(165.2f, values.get(2)); + + // test create + TupleN newTuple = toi.create("Alice", 28, 165.2f); + assertEquals(tuple, newTuple); + TupleObjectInspector<Pair> poi = new TupleObjectInspector<Pair>(TupleFactory.PAIR, + Writables.strings(), Writables.ints()); + Pair pair = poi.create("word", 29); + assertEquals("word", pair.first()); + assertEquals(29, pair.second()); + } + + @Test + public void testByteBufferObjectInspector() { + byte[] bytes = {0, 9, 4, 18, 64, 6, 1}; + BytesWritable bw = new BytesWritable(bytes); + ByteBuffer buf = ByteBuffer.wrap(bytes); + ByteBufferObjectInspector bboi = new ByteBufferObjectInspector(); + + assertArrayEquals(bytes, bboi.getPrimitiveJavaObject(buf)); + assertEquals(bw, bboi.getPrimitiveWritableObject(buf)); + assertEquals(buf, bboi.create(bytes)); + assertEquals(buf, bboi.create(bw)); + + ByteBuffer newBuf = bboi.copyObject(buf); + assertTrue(buf != newBuf); + assertEquals(buf, newBuf); + } + +} http://git-wip-us.apache.org/repos/asf/crunch/blob/363c8243/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 0503a80..f22d08d 100644 --- a/pom.xml +++ b/pom.xml @@ -52,6 +52,7 @@ under the License. <module>crunch-archetype</module> <module>crunch-scrunch</module> <module>crunch-spark</module> + <module>crunch-hive</module> <module>crunch-dist</module> </modules> @@ -74,6 +75,7 @@ under the License. <commons-logging.version>1.1.1</commons-logging.version> <commons-cli.version>1.2</commons-cli.version> <avro.version>1.7.4</avro.version> + <hive.version>0.13.1</hive.version> <parquet.version>1.3.2</parquet.version> <javassist.version>3.16.1-GA</javassist.version> <jackson.version>1.8.8</jackson.version> @@ -410,6 +412,12 @@ under the License. </dependency> <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-exec</artifactId> + <version>${hive.version}</version> + </dependency> + + <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-api</artifactId> <version>${slf4j.version}</version>
