Repository: incubator-hivemall Updated Branches: refs/heads/master eac48007b -> 1e1b77ea4
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java b/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java new file mode 100644 index 0000000..3ab7e8b --- /dev/null +++ b/core/src/test/java/hivemall/statistics/MovingAverageUDTFTest.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.statistics; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.Collector; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class MovingAverageUDTFTest { + + @Test + public void test() throws HiveException { + MovingAverageUDTF udtf = new MovingAverageUDTF(); + + ObjectInspector argOI0 = PrimitiveObjectInspectorFactory.javaFloatObjectInspector; + ObjectInspector argOI1 = ObjectInspectorUtils.getConstantObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, 3); + + final List<Double> results = new ArrayList<>(); + udtf.initialize(new ObjectInspector[] {argOI0, argOI1}); + udtf.setCollector(new Collector() { + @Override + public void collect(Object input) throws HiveException { + Object[] objs = (Object[]) input; + Assert.assertEquals(1, objs.length); + Assert.assertTrue(objs[0] instanceof DoubleWritable); + double x = ((DoubleWritable) objs[0]).get(); + results.add(x); + } + }); + + udtf.process(new Object[] {1.f, null}); + udtf.process(new Object[] {2.f, null}); + udtf.process(new Object[] {3.f, null}); + udtf.process(new Object[] {4.f, null}); + udtf.process(new Object[] {5.f, null}); + udtf.process(new Object[] {6.f, null}); + udtf.process(new Object[] {7.f, null}); + + Assert.assertEquals(Arrays.asList(1.d, 1.5d, 2.d, 3.d, 4.d, 5.d, 6.d), results); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/TryCastUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/TryCastUDFTest.java b/core/src/test/java/hivemall/tools/TryCastUDFTest.java new file mode 100644 index 0000000..7b8f6af --- /dev/null +++ b/core/src/test/java/hivemall/tools/TryCastUDFTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +public class TryCastUDFTest { + + @Test + public void testList() throws IOException, HiveException { + // try_cast(array(1.0,2.0,3.0), 'array<string>'); + TryCastUDF udf = new TryCastUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("array<string>"))}); + + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0.1, 1.1, 2.1}))}; + + Object result = udf.evaluate(args); + + Assert.assertEquals(WritableUtils.val("0.1", "1.1", "2.1"), result); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java new file mode 100644 index 0000000..ef4b3e5 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ArrayAppendUDFTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class ArrayAppendUDFTest { + + @Test + public void testEvaluate() throws HiveException, IOException { + ArrayAppendUDF udf = new ArrayAppendUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject(new Double(3))}; + + List<Object> result = udf.evaluate(args); + + Assert.assertEquals(4, result.size()); + for (int i = 0; i < 4; i++) { + Assert.assertEquals(new DoubleWritable(i), result.get(i)); + } + + udf.close(); + } + + @Test + public void testEvaluateAvoidNullAppend() throws HiveException, IOException { + ArrayAppendUDF udf = new ArrayAppendUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject(null)}; + + List<Object> result = udf.evaluate(args); + + Assert.assertEquals(3, result.size()); + for (int i = 0; i < 3; i++) { + Assert.assertEquals(new DoubleWritable(i), result.get(i)); + } + + udf.close(); + } + + + @Test + public void testEvaluateReturnNull() throws HiveException, IOException { + ArrayAppendUDF udf = new ArrayAppendUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaDoubleObjectInspector}); + + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject(null), + new GenericUDF.DeferredJavaObject(new Double(3))}; + + List<Object> result = udf.evaluate(args); + + Assert.assertNull(result); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java new file mode 100644 index 0000000..c22ea05 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ArrayElementAtUDFTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class ArrayElementAtUDFTest { + + @Test + public void testDouble() throws IOException, HiveException { + ArrayElementAtUDF udf = new ArrayElementAtUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject(new Integer(1))}; + + Assert.assertEquals(new DoubleWritable(1), udf.evaluate(args)); + + args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject(new Integer(4))}; + Assert.assertNull(udf.evaluate(args)); + + args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject(new Integer(-2))}; + Assert.assertEquals(new DoubleWritable(1), udf.evaluate(args)); + + udf.close(); + } + + @Test + public void testString() throws IOException, HiveException { + ArrayElementAtUDF udf = new ArrayElementAtUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableStringObjectInspector), + PrimitiveObjectInspectorFactory.javaIntObjectInspector}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject(WritableUtils.val("s0", "s1", "s2")), + new GenericUDF.DeferredJavaObject(1)}; + + Assert.assertEquals(WritableUtils.val("s1"), udf.evaluate(args)); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java new file mode 100644 index 0000000..ae8bc2c --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ArrayFlattenUDFTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class ArrayFlattenUDFTest { + + @Test + public void testEvaluate() throws HiveException, IOException { + ArrayFlattenUDF udf = new ArrayFlattenUDF(); + + udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector))}); + + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject( + Arrays.asList(Arrays.asList(0, 1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7)))}; + + List<Object> result = udf.evaluate(args); + + Assert.assertEquals(8, result.size()); + for (int i = 0; i < 8; i++) { + Assert.assertEquals(new Integer(i), result.get(i)); + } + + udf.close(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java b/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java new file mode 100644 index 0000000..ca526c5 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ArraySliceUDFTest.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.IntWritable; +import org.junit.Assert; +import org.junit.Test; + +public class ArraySliceUDFTest { + + @Test + public void testNonNullReturn() throws IOException, HiveException { + ArraySliceUDF udf = new ArraySliceUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.writableIntObjectInspector, + PrimitiveObjectInspectorFactory.writableIntObjectInspector}); + + IntWritable offset = new IntWritable(); + IntWritable length = new IntWritable(); + DeferredObject arg1 = new GenericUDF.DeferredJavaObject(offset); + DeferredObject arg2 = new GenericUDF.DeferredJavaObject(length); + DeferredObject nullarg = new GenericUDF.DeferredJavaObject(null); + + DeferredObject[] args = + new DeferredObject[] { + new GenericUDF.DeferredJavaObject(Arrays.asList("zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "ten")), + arg1, arg2}; + + offset.set(0); + length.set(3); + List<Object> actual = udf.evaluate(args); + Assert.assertEquals(Arrays.asList("zero", "one", "two"), actual); + + offset.set(1); + length.set(-2); + actual = udf.evaluate(args); + Assert.assertEquals( + Arrays.asList("one", "two", "three", "four", "five", "six", "seven", "eight"), actual); + + offset.set(1); + length.set(0); + actual = udf.evaluate(args); + Assert.assertEquals(Collections.emptyList(), actual); + + offset.set(-1); + length.set(0); + actual = udf.evaluate(args); + Assert.assertEquals(Collections.emptyList(), actual); + + offset.set(6); + args[2] = nullarg; + actual = udf.evaluate(args); + Assert.assertEquals(Arrays.asList("six", "seven", "eight", "nine", "ten"), actual); + + udf.close(); + } + + @Test + public void testNullReturn() throws IOException, HiveException { + ArraySliceUDF udf = new ArraySliceUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector), + PrimitiveObjectInspectorFactory.writableIntObjectInspector, + PrimitiveObjectInspectorFactory.writableIntObjectInspector}); + + IntWritable offset = new IntWritable(); + IntWritable length = new IntWritable(); + DeferredObject arg1 = new GenericUDF.DeferredJavaObject(offset); + DeferredObject arg2 = new GenericUDF.DeferredJavaObject(length); + + DeferredObject[] args = + new DeferredObject[] { + new GenericUDF.DeferredJavaObject(Arrays.asList("zero", "one", "two", + "three", "four", "five", "six", "seven", "eight", "nine", "ten")), + arg1, arg2}; + + + offset.set(-12); + length.set(0); + List<Object> actual = udf.evaluate(args); + Assert.assertNull(actual); + + udf.close(); + + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java b/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java new file mode 100644 index 0000000..a65a182 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ArrayUnionUDFTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class ArrayUnionUDFTest { + + @Test + public void testUnion3() throws HiveException, IOException { + ArrayUnionUDF udf = new ArrayUnionUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2})), + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {2, 3, 4})), + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {4, 5}))}; + + List<Object> result = udf.evaluate(args); + + Assert.assertEquals(6, result.size()); + Assert.assertEquals(WritableUtils.toWritableList(new double[] {0, 1, 2, 3, 4, 5}), result); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java b/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java new file mode 100644 index 0000000..b2585a1 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/ConditionalEmitUDTFTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.Collector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class ConditionalEmitUDTFTest { + + @Test + public void test() throws HiveException { + ConditionalEmitUDTF udtf = new ConditionalEmitUDTF(); + + udtf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaStringObjectInspector),}); + + final List<Object> actual = new ArrayList<>(); + udtf.setCollector(new Collector() { + @Override + public void collect(Object input) throws HiveException { + Object[] fowardObj = (Object[]) input; + Assert.assertEquals(1, fowardObj.length); + actual.add(fowardObj[0]); + } + }); + + udtf.process( + new Object[] {Arrays.asList(true, false, true), Arrays.asList("one", "two", "three")}); + + Assert.assertEquals(Arrays.asList("one", "three"), actual); + + actual.clear(); + + udtf.process( + new Object[] {Arrays.asList(true, true, false), Arrays.asList("one", "two", "three")}); + Assert.assertEquals(Arrays.asList("one", "two"), actual); + + udtf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java b/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java new file mode 100644 index 0000000..2291366 --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/FirstElementUDFTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class FirstElementUDFTest { + + @Test + public void test() throws IOException, HiveException { + FirstElementUDF udf = new FirstElementUDF(); + + udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2}))}; + + Assert.assertEquals(WritableUtils.val(0.d), udf.evaluate(args)); + + udf.close(); + } + + @Test + public void testNull() throws IOException, HiveException { + FirstElementUDF udf = new FirstElementUDF(); + + udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject(WritableUtils.toWritableList(new double[] {}))}; + + Assert.assertNull(udf.evaluate(args)); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java b/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java new file mode 100644 index 0000000..177cfdd --- /dev/null +++ b/core/src/test/java/hivemall/tools/array/LastElementUDFTest.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.array; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class LastElementUDFTest { + + @Test + public void test() throws IOException, HiveException { + LastElementUDF udf = new LastElementUDF(); + + udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0, 1, 2}))}; + + Assert.assertEquals(WritableUtils.val(2.d), udf.evaluate(args)); + + udf.close(); + } + + @Test + public void testNull() throws IOException, HiveException { + LastElementUDF udf = new LastElementUDF(); + + udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject(WritableUtils.toWritableList(new double[] {}))}; + + Assert.assertNull(udf.evaluate(args)); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java new file mode 100644 index 0000000..1aa8606 --- /dev/null +++ b/core/src/test/java/hivemall/tools/json/FromJsonUDFTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.json; + +import hivemall.utils.hadoop.HiveUtils; + +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +public class FromJsonUDFTest { + + @Test + public void testDoubleArray() throws Exception { + FromJsonUDF udf = new FromJsonUDF(); + + String json = "[0.1,1.1,2.2]"; + String types = "array<double>"; + List<Double> expected = Arrays.asList(0.1d, 1.1d, 2.2d); + + ObjectInspector[] argOIs = new ObjectInspector[] { + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + HiveUtils.getConstStringObjectInspector(types)}; + DeferredObject[] args = + new DeferredObject[] {new GenericUDF.DeferredJavaObject(new Text(json)), null}; + + udf.initialize(argOIs); + Object result = udf.evaluate(args); + + Assert.assertEquals(expected, result); + + udf.close(); + } + + @SuppressWarnings("unchecked") + @Test + public void testPersonStruct() throws Exception { + FromJsonUDF udf = new FromJsonUDF(); + + String json = "{ \"person\" : { \"name\" : \"makoto\" , \"age\" : 37 } }"; + String types = "struct<name:string,age:int>"; + + ObjectInspector[] argOIs = new ObjectInspector[] { + PrimitiveObjectInspectorFactory.writableStringObjectInspector, + HiveUtils.getConstStringObjectInspector(types), + HiveUtils.getConstStringObjectInspector("person")}; + DeferredObject[] args = + new DeferredObject[] {new GenericUDF.DeferredJavaObject(new Text(json)), null}; + + udf.initialize(argOIs); + List<Object> result = (List<Object>) udf.evaluate(args); + + Assert.assertEquals(2, result.size()); + Assert.assertEquals("makoto", result.get(0)); + Assert.assertEquals(37, result.get(1)); + + udf.close(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java new file mode 100644 index 0000000..d945cf5 --- /dev/null +++ b/core/src/test/java/hivemall/tools/json/ToJsonUDFTest.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.json; + +import hivemall.utils.hadoop.WritableUtils; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.junit.Assert; +import org.junit.Test; + +public class ToJsonUDFTest { + + @Test + public void testDoubleArray() throws Exception { + ToJsonUDF udf = new ToJsonUDF(); + + ObjectInspector[] argOIs = + new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)}; + DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {0.1, 1.1, 2.1}))}; + + udf.initialize(argOIs); + Text serialized = udf.evaluate(args); + + Assert.assertEquals("[0.1,1.1,2.1]", serialized.toString()); + + udf.close(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/sanity/AssertUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/sanity/AssertUDFTest.java b/core/src/test/java/hivemall/tools/sanity/AssertUDFTest.java new file mode 100644 index 0000000..959238b --- /dev/null +++ b/core/src/test/java/hivemall/tools/sanity/AssertUDFTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.sanity; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Assert; +import org.junit.Test; + +public class AssertUDFTest { + + @Test + public void testTrue() throws HiveException { + AssertUDF udf = new AssertUDF(); + Assert.assertTrue(udf.evaluate(true, "not error")); + } + + @Test(expected = HiveException.class) + public void testFalse() throws HiveException { + AssertUDF udf = new AssertUDF(); + udf.evaluate(false); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java b/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java new file mode 100644 index 0000000..004ba26 --- /dev/null +++ b/core/src/test/java/hivemall/tools/sanity/RaiseErrorUDFTest.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.sanity; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class RaiseErrorUDFTest { + + @Test(expected = HiveException.class) + public void test() throws HiveException { + RaiseErrorUDF udf = new RaiseErrorUDF(); + udf.evaluate(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java new file mode 100644 index 0000000..2f34d0e --- /dev/null +++ b/core/src/test/java/hivemall/tools/vector/VectorAddUDFTest.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.vector; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class VectorAddUDFTest { + + @Test + public void testAddDouble() throws IOException, HiveException { + VectorAddUDF udf = new VectorAddUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {1, 2, 3})), + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new float[] {2, 3, 4}))}; + + List<?> actual = udf.evaluate(args); + List<Double> expected = Arrays.asList(3.d, 5.d, 7.d); + + Assert.assertEquals(expected, actual); + + udf.close(); + } + + @Test + public void testAddLong() throws IOException, HiveException { + VectorAddUDF udf = new VectorAddUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableLongObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableIntObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new long[] {1, 2, 3})), + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new int[] {2, 3, 4}))}; + + List<?> actual = udf.evaluate(args); + List<Long> expected = Arrays.asList(3L, 5L, 7L); + + Assert.assertEquals(expected, actual); + + udf.close(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java new file mode 100644 index 0000000..fb1e10f --- /dev/null +++ b/core/src/test/java/hivemall/tools/vector/VectorDotUDFTest.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.tools.vector; + +import hivemall.utils.hadoop.WritableUtils; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +public class VectorDotUDFTest { + + @Test + public void testDotp() throws HiveException, IOException { + VectorDotUDF udf = new VectorDotUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableFloatObjectInspector)}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {1, 2, 3})), + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new float[] {2, 3, 4}))}; + + List<Double> actual = udf.evaluate(args); + List<Double> expected = Arrays.asList(2.d, 6.d, 12.d); + + Assert.assertEquals(expected, actual); + + udf.close(); + } + + @Test + public void testDotpScalar() throws HiveException, IOException { + VectorDotUDF udf = new VectorDotUDF(); + + udf.initialize(new ObjectInspector[] { + ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.writableDoubleObjectInspector), + PrimitiveObjectInspectorFactory.writableFloatObjectInspector}); + + DeferredObject[] args = new DeferredObject[] { + new GenericUDF.DeferredJavaObject( + WritableUtils.toWritableList(new double[] {1, 2, 3})), + new GenericUDF.DeferredJavaObject(WritableUtils.val(2.f))}; + + List<Double> actual = udf.evaluate(args); + List<Double> expected = Arrays.asList(2.d, 4.d, 6.d); + + Assert.assertEquals(expected, actual); + + udf.close(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/utils/collections/DoubleRingBufferTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/DoubleRingBufferTest.java b/core/src/test/java/hivemall/utils/collections/DoubleRingBufferTest.java index c07a8af..45b16d5 100644 --- a/core/src/test/java/hivemall/utils/collections/DoubleRingBufferTest.java +++ b/core/src/test/java/hivemall/utils/collections/DoubleRingBufferTest.java @@ -128,4 +128,28 @@ public class DoubleRingBufferTest { Assert.assertArrayEquals(new double[] {2, 3, 4}, dst, 0.d); } + @Test + public void testHead() { + DoubleRingBuffer ring = new DoubleRingBuffer(3); + Assert.assertTrue(ring.isEmpty()); + Assert.assertEquals(0.d, ring.head(), 0.d); + Assert.assertEquals(0, ring.size()); + ring.add(1.d); + Assert.assertEquals(1, ring.size()); + ring.add(2.d); + Assert.assertEquals(2, ring.size()); + ring.add(3.d); + Assert.assertEquals(3, ring.size()); + Assert.assertTrue(ring.isFull()); + Assert.assertEquals(1.d, ring.head(), 0.d); + Assert.assertEquals(3, ring.size()); + ring.add(4.d); + Assert.assertEquals(2.d, ring.head(), 0.d); + ring.add(5.d); + Assert.assertEquals(3.d, ring.head(), 0.d); + ring.add(6.d); + Assert.assertEquals(4.d, ring.head(), 0.d); + Assert.assertEquals(3, ring.size()); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/utils/hadoop/JsonSerdeUtilsTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/hadoop/JsonSerdeUtilsTest.java b/core/src/test/java/hivemall/utils/hadoop/JsonSerdeUtilsTest.java new file mode 100644 index 0000000..5b971e7 --- /dev/null +++ b/core/src/test/java/hivemall/utils/hadoop/JsonSerdeUtilsTest.java @@ -0,0 +1,365 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.hadoop; + +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.annotation.Nonnull; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.Text; +import org.apache.hive.hcatalog.data.DataType; +import org.apache.hive.hcatalog.data.DefaultHCatRecord; +import org.apache.hive.hcatalog.data.HCatRecordObjectInspector; +import org.apache.hive.hcatalog.data.HCatRecordObjectInspectorFactory; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.collect.ImmutableMap; + +public class JsonSerdeUtilsTest { + + /** + * This test tests that our json deserialization is not too strict, as per HIVE-6166 + * + * i.e, if our schema is "s:struct<a:int,b:string>,k:int", and we pass in data that looks like : + * + * <pre> + * { + * "x" : "abc" , + * "t" : { + * "a" : "1", + * "b" : "2", + * "c" : [ + * { "x" : 2 , "y" : 3 } , + * { "x" : 3 , "y" : 2 } + * ] + * } , + * "s" : { + * "a" : 2 , + * "b" : "blah", + * "c": "woo" + * } + * } + * </pre> + * + * Then it should still work, and ignore the "x" and "t" field and "c" subfield of "s", and it + * should read k as null. + */ + @Test + public void testLooseJsonReadability() throws Exception { + List<String> columnNames = Arrays.asList("s,k".split(",")); + List<TypeInfo> columnTypes = + TypeInfoUtils.getTypeInfosFromTypeString("struct<a:int,b:string>,int"); + + Text jsonText1 = new Text("{ \"x\" : \"abc\" , " + + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } }"); + + Text jsonText2 = new Text("{ \"x\" : \"abc\" , " + + " \"t\" : { \"a\":\"1\", \"b\":\"2\", \"c\":[ { \"x\":2 , \"y\":3 } , { \"x\":3 , \"y\":2 }] } ," + + "\"s\" : { \"a\" : 2 , \"b\" : \"blah\", \"c\": \"woo\" } , " + "\"k\" : 113 " + + "}"); + + List<Object> expected1 = Arrays.<Object>asList(Arrays.asList(2, "blah"), null); + List<Object> expected2 = Arrays.<Object>asList(Arrays.asList(2, "blah"), 113); + List<Object> result1 = JsonSerdeUtils.deserialize(jsonText1, columnNames, columnTypes); + List<Object> result2 = JsonSerdeUtils.deserialize(jsonText2, columnNames, columnTypes); + + Assert.assertEquals(expected1, result1); + Assert.assertEquals(expected2, result2); + } + + @Test + public void testMapValues() throws SerDeException { + List<String> columnNames = Arrays.asList("a,b".split(",")); + List<TypeInfo> columnTypes = + TypeInfoUtils.getTypeInfosFromTypeString("array<string>,map<string,int>"); + + Text text1 = new Text("{ \"a\":[\"aaa\"],\"b\":{\"bbb\":1}} "); + Text text2 = new Text("{\"a\":[\"yyy\"],\"b\":{\"zzz\":123}}"); + Text text3 = new Text("{\"a\":[\"a\"],\"b\":{\"x\":11, \"y\": 22, \"z\": null}}"); + + List<Object> expected1 = Arrays.<Object>asList(Arrays.<String>asList("aaa"), + createHashMapStringInteger("bbb", 1)); + List<Object> expected2 = Arrays.<Object>asList(Arrays.<String>asList("yyy"), + createHashMapStringInteger("zzz", 123)); + List<Object> expected3 = Arrays.<Object>asList(Arrays.<String>asList("a"), + createHashMapStringInteger("x", 11, "y", 22, "z", null)); + + List<Object> result1 = JsonSerdeUtils.deserialize(text1, columnNames, columnTypes); + List<Object> result2 = JsonSerdeUtils.deserialize(text2, columnNames, columnTypes); + List<Object> result3 = JsonSerdeUtils.deserialize(text3, columnNames, columnTypes); + + Assert.assertEquals(expected1, result1); + Assert.assertEquals(expected2, result2); + Assert.assertEquals(expected3, result3); + } + + private static HashMap<String, Integer> createHashMapStringInteger(Object... vals) { + Assert.assertTrue(vals.length % 2 == 0); + HashMap<String, Integer> retval = new HashMap<String, Integer>(); + for (int idx = 0; idx < vals.length; idx += 2) { + retval.put((String) vals[idx], (Integer) vals[idx + 1]); + } + return retval; + } + + @Test + public void testRW() throws Exception { + List<Object> rlist = new ArrayList<Object>(13); + { + rlist.add(new Byte("123")); + rlist.add(new Short("456")); + rlist.add(new Integer(789)); + rlist.add(new Long(1000L)); + rlist.add(new Double(5.3D)); + rlist.add(new Float(2.39F)); + rlist.add(new String("hcat\nand\nhadoop")); + rlist.add(null); + + List<Object> innerStruct = new ArrayList<Object>(2); + innerStruct.add(new String("abc")); + innerStruct.add(new String("def")); + rlist.add(innerStruct); + + List<Integer> innerList = new ArrayList<Integer>(); + innerList.add(314); + innerList.add(007); + rlist.add(innerList); + + Map<Short, String> map = new HashMap<Short, String>(3); + map.put(new Short("2"), "hcat is cool"); + map.put(new Short("3"), "is it?"); + map.put(new Short("4"), "or is it not?"); + rlist.add(map); + + rlist.add(new Boolean(true)); + + List<Object> c1 = new ArrayList<Object>(); + List<Object> c1_1 = new ArrayList<Object>(); + c1_1.add(new Integer(12)); + List<Object> i2 = new ArrayList<Object>(); + List<Integer> ii1 = new ArrayList<Integer>(); + ii1.add(new Integer(13)); + ii1.add(new Integer(14)); + i2.add(ii1); + Map<String, List<?>> ii2 = new HashMap<String, List<?>>(); + List<Integer> iii1 = new ArrayList<Integer>(); + iii1.add(new Integer(15)); + ii2.put("phew", iii1); + i2.add(ii2); + c1_1.add(i2); + c1.add(c1_1); + rlist.add(c1); + rlist.add(HiveDecimal.create(new BigDecimal("123.45")));//prec 5, scale 2 + rlist.add(new HiveChar("hive\nchar", 10)); + rlist.add(new HiveVarchar("hive\nvarchar", 20)); + rlist.add(Date.valueOf("2014-01-07")); + rlist.add(new Timestamp(System.currentTimeMillis())); + rlist.add("hive\nbinary".getBytes("UTF-8")); + } + + DefaultHCatRecord r = new DefaultHCatRecord(rlist); + + List<String> columnNames = + Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); + List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); + + StructTypeInfo rowTypeInfo = + (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + HCatRecordObjectInspector objInspector = + HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + + Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); + List<Object> deserialized = + JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); + + assertRecordEquals(rlist, deserialized); + } + + @Test + public void testRWNull() throws Exception { + List<Object> nlist = new ArrayList<Object>(13); + { + nlist.add(null); // tinyint + nlist.add(null); // smallint + nlist.add(null); // int + nlist.add(null); // bigint + nlist.add(null); // double + nlist.add(null); // float + nlist.add(null); // string + nlist.add(null); // string + nlist.add(null); // struct + nlist.add(null); // array + nlist.add(null); // map + nlist.add(null); // bool + nlist.add(null); // complex + nlist.add(null); //decimal(5,2) + nlist.add(null); //char(10) + nlist.add(null); //varchar(20) + nlist.add(null); //date + nlist.add(null); //timestamp + nlist.add(null); //binary + } + + DefaultHCatRecord r = new DefaultHCatRecord(nlist); + + List<String> columnNames = + Arrays.asList("ti,si,i,bi,d,f,s,n,r,l,m,b,c1,bd,hc,hvc,dt,ts,bin".split(",")); + List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString( + "tinyint,smallint,int,bigint,double,float,string,string," + + "struct<a:string,b:string>,array<int>,map<smallint,string>,boolean," + + "array<struct<i1:int,i2:struct<ii1:array<int>,ii2:map<string,struct<iii1:int>>>>>," + + "decimal(5,2),char(10),varchar(20),date,timestamp,binary"); + + StructTypeInfo rowTypeInfo = + (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + HCatRecordObjectInspector objInspector = + HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo); + + Text serialized = JsonSerdeUtils.serialize(r, objInspector, columnNames); + List<Object> deserialized = + JsonSerdeUtils.deserialize(serialized, columnNames, columnTypes); + + assertRecordEquals(nlist, deserialized); + } + + @Test + public void testStructWithoutColumnNames() throws Exception { + Text json1 = new Text("{ \"person\" : { \"name\" : \"makoto\" , \"age\" : 37 } }"); + TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("struct<name:string,age:int>"); + List<Object> expected1 = Arrays.<Object>asList("makoto", 37); + + List<Object> deserialized1 = + JsonSerdeUtils.deserialize(json1, Arrays.asList("person"), Arrays.asList(type1)); + + assertRecordEquals(expected1, deserialized1); + } + + @Test + public void testTopLevelArray() throws Exception { + List<String> expected1 = Arrays.asList("Taro", "Tanaka"); + Text json1 = new Text("[\"Taro\",\"Tanaka\"]"); + TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("array<string>"); + + List<Object> deserialized1 = JsonSerdeUtils.deserialize(json1, type1); + assertRecordEquals(expected1, deserialized1); + Text serialized1 = JsonSerdeUtils.serialize(deserialized1, + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1)); + Assert.assertEquals(json1, serialized1); + + List<Double> expected2 = Arrays.asList(1.1d, 2.2d, 3.3d); + Text json2 = new Text("[1.1,2.2,3.3]"); + TypeInfo type2 = TypeInfoUtils.getTypeInfoFromTypeString("array<double>"); + + List<Object> deserialized2 = JsonSerdeUtils.deserialize(json2, type2); + assertRecordEquals(expected2, deserialized2); + Text serialized2 = JsonSerdeUtils.serialize(deserialized2, + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type2)); + Assert.assertEquals(json2, serialized2); + } + + @Test + public void testTopLevelNestedArray() throws Exception { + List<Map<String, Integer>> expected1 = Arrays.<Map<String, Integer>>asList( + ImmutableMap.of("one", 1), ImmutableMap.of("two", 2)); + Text json1 = new Text("[{\"one\":1},{\"two\":2}]"); + TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("array<map<string,int>>"); + + List<Object> deserialized1 = JsonSerdeUtils.deserialize(json1, type1); + assertRecordEquals(expected1, deserialized1); + Text serialized1 = JsonSerdeUtils.serialize(deserialized1, + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1)); + Assert.assertEquals(json1, serialized1); + } + + @Test + public void testTopLevelPrimitive() throws Exception { + Double expected1 = Double.valueOf(3.3); + Text json1 = new Text("3.3"); + TypeInfo type1 = TypeInfoUtils.getTypeInfoFromTypeString("double"); + + Object deserialized1 = JsonSerdeUtils.deserialize(json1, type1); + Assert.assertEquals(expected1, deserialized1); + Text serialized1 = JsonSerdeUtils.serialize(deserialized1, + HCatRecordObjectInspectorFactory.getStandardObjectInspectorFromTypeInfo(type1)); + Assert.assertEquals(json1, serialized1); + + Boolean expected2 = Boolean.FALSE; + Text json2 = new Text("false"); + + Boolean deserialized2 = JsonSerdeUtils.deserialize(json2); + Assert.assertEquals(expected2, deserialized2); + Text serialized2 = JsonSerdeUtils.serialize(deserialized2, + PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); + Assert.assertEquals(json2, serialized2); + } + + + private static void assertRecordEquals(@Nonnull final List<?> first, + @Nonnull final List<?> second) { + int mySz = first.size(); + int urSz = second.size(); + if (mySz != urSz) { + throw new RuntimeException( + "#expected != #actual. #expected=" + mySz + ", #actual=" + urSz); + } else { + for (int i = 0; i < first.size(); i++) { + int c = DataType.compare(first.get(i), second.get(i)); + if (c != 0) { + String msg = "first.get(" + i + "}='" + first.get(i) + "' second.get(" + i + + ")='" + second.get(i) + "' compared as " + c + "\n" + "Types 1st/2nd=" + + DataType.findType(first.get(i)) + "/" + + DataType.findType(second.get(i)) + '\n' + "first='" + first.get(i) + + "' second='" + second.get(i) + "'"; + if (first.get(i) instanceof Date) { + msg += "\n((Date)first.get(i)).getTime()=" + + ((Date) first.get(i)).getTime(); + } + if (second.get(i) instanceof Date) { + msg += "\n((Date)second.get(i)).getTime()=" + + ((Date) second.get(i)).getTime(); + } + throw new RuntimeException(msg); + } + } + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/utils/stats/MovingAverageTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/stats/MovingAverageTest.java b/core/src/test/java/hivemall/utils/stats/MovingAverageTest.java new file mode 100644 index 0000000..accb2ca --- /dev/null +++ b/core/src/test/java/hivemall/utils/stats/MovingAverageTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.stats; + +import hivemall.utils.stats.MovingAverage; + +import org.junit.Assert; +import org.junit.Test; + +public class MovingAverageTest { + + @Test + public void testAdd() { + MovingAverage movingAvg = new MovingAverage(3); + Assert.assertEquals(0.d, movingAvg.get(), 0.d); + Assert.assertEquals(1.d, movingAvg.add(1.d), 0.d); + Assert.assertEquals(1.5d, movingAvg.add(2.d), 0.d); // (1+2)/2 = 1.5 + Assert.assertEquals(2.d, movingAvg.add(3.d), 0.d); // (1+2+3)/3 = 2 + Assert.assertEquals(3.d, movingAvg.add(4.d), 0.d); // (2+3+4)/3 = 3 + Assert.assertEquals(4.d, movingAvg.add(5.d), 0.d); // (3+4+5)/3 = 4 + Assert.assertEquals(5.d, movingAvg.add(6.d), 0.d); // (4+5+6)/3 = 5 + Assert.assertEquals(6.d, movingAvg.add(7.d), 0.d); // (5+6+7)/3 = 6 + } + + @Test(expected = IllegalArgumentException.class) + public void testNaN() { + MovingAverage movingAvg = new MovingAverage(3); + movingAvg.add(Double.NaN); + } + + @Test(expected = IllegalArgumentException.class) + public void testInfinity() { + MovingAverage movingAvg = new MovingAverage(3); + movingAvg.add(Double.POSITIVE_INFINITY); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/core/src/test/java/hivemall/utils/stats/OnlineVarianceTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/stats/OnlineVarianceTest.java b/core/src/test/java/hivemall/utils/stats/OnlineVarianceTest.java new file mode 100644 index 0000000..7c372b5 --- /dev/null +++ b/core/src/test/java/hivemall/utils/stats/OnlineVarianceTest.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.stats; + +import java.util.Collections; +import java.util.ArrayList; +import java.util.Random; + +import static org.junit.Assert.assertEquals; + +import hivemall.utils.stats.OnlineVariance; + +import org.junit.Test; + +public class OnlineVarianceTest { + + @Test + public void testSimple() { + OnlineVariance onlineVariance = new OnlineVariance(); + + long n = 0L; + double sum = 0.d; + double sumOfSquare = 0.d; + + assertEquals(0L, onlineVariance.numSamples()); + assertEquals(0.d, onlineVariance.mean(), 1e-5f); + assertEquals(0.d, onlineVariance.variance(), 1e-5f); + assertEquals(0.d, onlineVariance.stddev(), 1e-5f); + + Random rnd = new Random(); + ArrayList<Double> dArrayList = new ArrayList<Double>(); + + for (int i = 0; i < 10; i++) { + double x = rnd.nextDouble(); + dArrayList.add(x); + onlineVariance.handle(x); + + n++; + sum += x; + sumOfSquare += x * x; + + double mean = n > 0 ? (sum / n) : 0.d; + double sampleVariance = n > 0 ? ((sumOfSquare / n) - mean * mean) : 0.d; + double unbiasedVariance = n > 1 ? (sampleVariance * n / (n - 1)) : 0.d; + double stddev = Math.sqrt(unbiasedVariance); + + assertEquals(n, onlineVariance.numSamples()); + assertEquals(mean, onlineVariance.mean(), 1e-5f); + assertEquals(unbiasedVariance, onlineVariance.variance(), 1e-5f); + assertEquals(stddev, onlineVariance.stddev(), 1e-5f); + } + + Collections.shuffle(dArrayList); + + for (Double x : dArrayList) { + onlineVariance.unhandle(x.doubleValue()); + + n--; + sum -= x; + sumOfSquare -= x * x; + + double mean = n > 0 ? (sum / n) : 0.d; + double sampleVariance = n > 0 ? ((sumOfSquare / n) - mean * mean) : 0.d; + double unbiasedVariance = n > 1 ? (sampleVariance * n / (n - 1)) : 0.d; + double stddev = Math.sqrt(unbiasedVariance); + + assertEquals(n, onlineVariance.numSamples()); + assertEquals(mean, onlineVariance.mean(), 1e-5f); + assertEquals(unbiasedVariance, onlineVariance.variance(), 1e-5f); + assertEquals(stddev, onlineVariance.stddev(), 1e-5f); + } + + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 250098e..1b5c8e6 100644 --- a/pom.xml +++ b/pom.xml @@ -364,6 +364,12 @@ </exclusions> </dependency> <dependency> + <groupId>org.apache.hive.hcatalog</groupId> + <artifactId>hive-hcatalog-core</artifactId> + <version>${hive.version}</version> + <scope>provided</scope> + </dependency> + <dependency> <groupId>commons-cli</groupId> <artifactId>commons-cli</artifactId> <version>1.2</version> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/resources/ddl/define-all-as-permanent.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive index ed9f22f..e7da8e3 100644 --- a/resources/ddl/define-all-as-permanent.hive +++ b/resources/ddl/define-all-as-permanent.hive @@ -413,8 +413,12 @@ CREATE FUNCTION array_concat as 'hivemall.tools.array.ArrayConcatUDF' USING JAR DROP FUNCTION IF EXISTS concat_array; CREATE FUNCTION concat_array as 'hivemall.tools.array.ArrayConcatUDF' USING JAR '${hivemall_jar}'; +-- alias for backward compatibility DROP FUNCTION IF EXISTS subarray; -CREATE FUNCTION subarray as 'hivemall.tools.array.SubarrayUDF' USING JAR '${hivemall_jar}'; +CREATE FUNCTION subarray as 'hivemall.tools.array.ArraySliceUDF' USING JAR '${hivemall_jar}'; + +DROP FUNCTION IF EXISTS array_slice; +CREATE FUNCTION array_slice as 'hivemall.tools.array.ArraySliceUDF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS array_avg; CREATE FUNCTION array_avg as 'hivemall.tools.array.ArrayAvgGenericUDAF' USING JAR '${hivemall_jar}'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/resources/ddl/define-all.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive index 0267a6d..9228ce9 100644 --- a/resources/ddl/define-all.hive +++ b/resources/ddl/define-all.hive @@ -405,8 +405,12 @@ create temporary function array_concat as 'hivemall.tools.array.ArrayConcatUDF'; drop temporary function if exists concat_array; create temporary function concat_array as 'hivemall.tools.array.ArrayConcatUDF'; +-- alias for backward compatibility drop temporary function if exists subarray; -create temporary function subarray as 'hivemall.tools.array.SubarrayUDF'; +create temporary function subarray as 'hivemall.tools.array.ArraySliceUDF'; + +drop temporary function if exists array_slice; +create temporary function array_slice as 'hivemall.tools.array.ArraySliceUDF'; drop temporary function if exists array_avg; create temporary function array_avg as 'hivemall.tools.array.ArrayAvgGenericUDAF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/resources/ddl/define-all.spark ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark index cf4a15c..3764ca2 100644 --- a/resources/ddl/define-all.spark +++ b/resources/ddl/define-all.spark @@ -405,7 +405,10 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_concat") sqlContext.sql("CREATE TEMPORARY FUNCTION array_concat AS 'hivemall.tools.array.ArrayConcatUDF'") sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS subarray") -sqlContext.sql("CREATE TEMPORARY FUNCTION subarray AS 'hivemall.tools.array.SubarrayUDF'") +sqlContext.sql("CREATE TEMPORARY FUNCTION subarray AS 'hivemall.tools.array.ArraySliceUDF'") + +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_slice") +sqlContext.sql("CREATE TEMPORARY FUNCTION array_slice AS 'hivemall.tools.array.ArraySliceUDF'") sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS array_avg") sqlContext.sql("CREATE TEMPORARY FUNCTION array_avg AS 'hivemall.tools.array.ArrayAvgGenericUDAF'") http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/resources/ddl/define-udfs.td.hql ---------------------------------------------------------------------- diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql index 6a7b75b..ec7ecf4 100644 --- a/resources/ddl/define-udfs.td.hql +++ b/resources/ddl/define-udfs.td.hql @@ -99,7 +99,6 @@ create temporary function sort_and_uniq_array as 'hivemall.tools.array.SortAndUn create temporary function subarray_endwith as 'hivemall.tools.array.SubarrayEndWithUDF'; create temporary function subarray_startwith as 'hivemall.tools.array.SubarrayStartWithUDF'; create temporary function array_concat as 'hivemall.tools.array.ArrayConcatUDF'; -create temporary function subarray as 'hivemall.tools.array.SubarrayUDF'; create temporary function array_avg as 'hivemall.tools.array.ArrayAvgGenericUDAF'; create temporary function array_sum as 'hivemall.tools.array.ArraySumUDAF'; create temporary function to_string_array as 'hivemall.tools.array.ToStringArrayUDF'; @@ -185,6 +184,7 @@ create temporary function train_slim as 'hivemall.recommend.SlimUDTF'; create temporary function hitrate as 'hivemall.evaluation.HitRateUDAF'; create temporary function word_ngrams as 'hivemall.tools.text.WordNgramsUDF'; create temporary function approx_count_distinct as 'hivemall.sketch.hll.ApproxCountDistinctUDAF'; +create temporary function array_slice as 'hivemall.tools.array.ArraySliceUDF'; -- NLP features create temporary function tokenize_ja as 'hivemall.nlp.tokenizer.KuromojiUDF'; @@ -197,6 +197,7 @@ create temporary function arow_regress as 'hivemall.regression.AROWRegressionUDT create temporary function addBias as 'hivemall.ftvec.AddBiasUDF'; create temporary function tree_predict_v1 as 'hivemall.smile.tools.TreePredictUDFv1'; create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; +create temporary function subarray as 'hivemall.tools.array.ArraySliceUDF'; -- alias for TD create temporary function approx_distinct as 'hivemall.sketch.hll.ApproxCountDistinctUDAF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/1e1b77ea/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala ---------------------------------------------------------------------- diff --git a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala index 90a21d7..b02ef02 100644 --- a/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala +++ b/spark/spark-2.2/src/main/scala/org/apache/spark/sql/hive/HivemallOps.scala @@ -1937,18 +1937,32 @@ object HivemallOps { } /** - * @see [[hivemall.tools.array.SubarrayUDF]] + * Alias of array_slice for a backward compatibility. + * + * @see [[hivemall.tools.array.ArraySliceUDF]] * @group tools.array */ def subarray(original: Column, fromIndex: Column, toIndex: Column): Column = withExpr { - planHiveUDF( - "hivemall.tools.array.SubarrayUDF", + planHiveGenericUDF( + "hivemall.tools.array.ArraySliceUDF", "subarray", original :: fromIndex :: toIndex :: Nil ) } /** + * @see [[hivemall.tools.array.ArraySliceUDF]] + * @group tools.array + */ + def array_slice(original: Column, fromIndex: Column, toIndex: Column): Column = withExpr { + planHiveGenericUDF( + "hivemall.tools.array.ArraySliceUDF", + "array_slice", + original :: fromIndex :: toIndex :: Nil + ) + } + + /** * @see [[hivemall.tools.array.ToStringArrayUDF]] * @group tools.array */
