Repository: incubator-hivemall Updated Branches: refs/heads/master 3410ba642 -> 380478916 (forced update)
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java b/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java index bf2ac11..f885041 100644 --- a/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java +++ b/core/src/test/java/hivemall/smile/tools/TreePredictUDFv1Test.java @@ -52,6 +52,7 @@ import smile.validation.CrossValidation; import smile.validation.LOOCV; import smile.validation.RMSE; +@SuppressWarnings("deprecation") public class TreePredictUDFv1Test { private static final boolean DEBUG = false; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashMapTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashMapTest.java b/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashMapTest.java deleted file mode 100644 index 6a2ff96..0000000 --- a/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashMapTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.collections.maps.Int2FloatOpenHashTable; - -import org.junit.Assert; -import org.junit.Test; - -public class Int2FloatOpenHashMapTest { - - @Test - public void testSize() { - Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); - map.put(1, 3.f); - Assert.assertEquals(3.f, map.get(1), 0.d); - map.put(1, 5.f); - Assert.assertEquals(5.f, map.get(1), 0.d); - Assert.assertEquals(1, map.size()); - } - - @Test - public void testDefaultReturnValue() { - Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); - Assert.assertEquals(0, map.size()); - Assert.assertEquals(-1.f, map.get(1), 0.d); - float ret = Float.MIN_VALUE; - map.defaultReturnValue(ret); - Assert.assertEquals(ret, map.get(1), 0.d); - } - - @Test - public void testPutAndGet() { - Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1.f, map.put(i, Float.valueOf(i + 0.1f)), 0.d); - } - Assert.assertEquals(numEntries, map.size()); - for (int i = 0; i < numEntries; i++) { - Float v = map.get(i); - Assert.assertEquals(i + 0.1f, v.floatValue(), 0.d); - } - } - - @Test - public void testIterator() { - Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(1000); - Int2FloatOpenHashTable.IMapIterator itor = map.entries(); - Assert.assertFalse(itor.hasNext()); - - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1.f, map.put(i, Float.valueOf(i + 0.1f)), 0.d); - } - Assert.assertEquals(numEntries, map.size()); - - itor = map.entries(); - Assert.assertTrue(itor.hasNext()); - while (itor.hasNext()) { - Assert.assertFalse(itor.next() == -1); - int k = itor.getKey(); - Float v = itor.getValue(); - Assert.assertEquals(k + 0.1f, v.floatValue(), 0.d); - } - Assert.assertEquals(-1, itor.next()); - } - - @Test - public void testIterator2() { - Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(100); - map.put(33, 3.16f); - - Int2FloatOpenHashTable.IMapIterator itor = map.entries(); - Assert.assertTrue(itor.hasNext()); - Assert.assertNotEquals(-1, itor.next()); - Assert.assertEquals(33, itor.getKey()); - Assert.assertEquals(3.16f, itor.getValue(), 0.d); - Assert.assertEquals(-1, itor.next()); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashTableTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashTableTest.java b/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashTableTest.java new file mode 100644 index 0000000..53814ac --- /dev/null +++ b/core/src/test/java/hivemall/utils/collections/maps/Int2FloatOpenHashTableTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.collections.maps.Int2FloatOpenHashTable; + +import org.junit.Assert; +import org.junit.Test; + +public class Int2FloatOpenHashTableTest { + + @Test + public void testSize() { + Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); + map.put(1, 3.f); + Assert.assertEquals(3.f, map.get(1), 0.d); + map.put(1, 5.f); + Assert.assertEquals(5.f, map.get(1), 0.d); + Assert.assertEquals(1, map.size()); + } + + @Test + public void testDefaultReturnValue() { + Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); + Assert.assertEquals(0, map.size()); + Assert.assertEquals(-1.f, map.get(1), 0.d); + float ret = Float.MIN_VALUE; + map.defaultReturnValue(ret); + Assert.assertEquals(ret, map.get(1), 0.d); + } + + @Test + public void testPutAndGet() { + Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(16384); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1.f, map.put(i, Float.valueOf(i + 0.1f)), 0.d); + } + Assert.assertEquals(numEntries, map.size()); + for (int i = 0; i < numEntries; i++) { + Float v = map.get(i); + Assert.assertEquals(i + 0.1f, v.floatValue(), 0.d); + } + } + + @Test + public void testIterator() { + Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(1000); + Int2FloatOpenHashTable.IMapIterator itor = map.entries(); + Assert.assertFalse(itor.hasNext()); + + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1.f, map.put(i, Float.valueOf(i + 0.1f)), 0.d); + } + Assert.assertEquals(numEntries, map.size()); + + itor = map.entries(); + Assert.assertTrue(itor.hasNext()); + while (itor.hasNext()) { + Assert.assertFalse(itor.next() == -1); + int k = itor.getKey(); + Float v = itor.getValue(); + Assert.assertEquals(k + 0.1f, v.floatValue(), 0.d); + } + Assert.assertEquals(-1, itor.next()); + } + + @Test + public void testIterator2() { + Int2FloatOpenHashTable map = new Int2FloatOpenHashTable(100); + map.put(33, 3.16f); + + Int2FloatOpenHashTable.IMapIterator itor = map.entries(); + Assert.assertTrue(itor.hasNext()); + Assert.assertNotEquals(-1, itor.next()); + Assert.assertEquals(33, itor.getKey()); + Assert.assertEquals(3.16f, itor.getValue(), 0.d); + Assert.assertEquals(-1, itor.next()); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashMapTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashMapTest.java b/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashMapTest.java index 7951b0b..ee36a83 100644 --- a/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashMapTest.java +++ b/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashMapTest.java @@ -18,11 +18,6 @@ */ package hivemall.utils.collections.maps; -import hivemall.utils.collections.maps.Int2LongOpenHashTable; -import hivemall.utils.lang.ObjectUtils; - -import java.io.IOException; - import org.junit.Assert; import org.junit.Test; @@ -30,7 +25,7 @@ public class Int2LongOpenHashMapTest { @Test public void testSize() { - Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + Int2LongOpenHashMap map = new Int2LongOpenHashMap(16384); map.put(1, 3L); Assert.assertEquals(3L, map.get(1)); map.put(1, 5L); @@ -40,67 +35,72 @@ public class Int2LongOpenHashMapTest { @Test public void testDefaultReturnValue() { - Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + Int2LongOpenHashMap map = new Int2LongOpenHashMap(16384); Assert.assertEquals(0, map.size()); - Assert.assertEquals(-1L, map.get(1)); - long ret = Long.MIN_VALUE; - map.defaultReturnValue(ret); - Assert.assertEquals(ret, map.get(1)); + Assert.assertEquals(0L, map.get(1)); + Assert.assertEquals(Long.MIN_VALUE, map.get(1, Long.MIN_VALUE)); } @Test public void testPutAndGet() { - Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + Int2LongOpenHashMap map = new Int2LongOpenHashMap(16384); final int numEntries = 1000000; for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1L, map.put(i, i)); + Assert.assertEquals(0L, map.put(i, i)); + Assert.assertEquals(0L, map.put(-i, -i)); } - Assert.assertEquals(numEntries, map.size()); + Assert.assertEquals(numEntries * 2 - 1, map.size()); for (int i = 0; i < numEntries; i++) { - long v = map.get(i); - Assert.assertEquals(i, v); + Assert.assertEquals(i, map.get(i)); + Assert.assertEquals(-i, map.get(-i)); } } @Test - public void testSerde() throws IOException, ClassNotFoundException { - Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + public void testPutRemoveGet() { + Int2LongOpenHashMap map = new Int2LongOpenHashMap(16384); final int numEntries = 1000000; for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1L, map.put(i, i)); + Assert.assertEquals(0L, map.put(i, i)); + Assert.assertEquals(0L, map.put(-i, -i)); + if (i % 2 == 0) { + Assert.assertEquals(i, map.remove(i, -1)); + } else { + Assert.assertEquals(i, map.put(i, i)); + } } - - byte[] b = ObjectUtils.toCompressedBytes(map); - map = new Int2LongOpenHashTable(16384); - ObjectUtils.readCompressedObject(b, map); - - Assert.assertEquals(numEntries, map.size()); + Assert.assertEquals(numEntries + (numEntries / 2) - 1, map.size()); for (int i = 0; i < numEntries; i++) { - long v = map.get(i); - Assert.assertEquals(i, v); + if (i % 2 == 0) { + Assert.assertFalse(map.containsKey(i)); + } else { + Assert.assertEquals(i, map.get(i)); + } + Assert.assertEquals(-i, map.get(-i)); } } @Test public void testIterator() { - Int2LongOpenHashTable map = new Int2LongOpenHashTable(1000); - Int2LongOpenHashTable.IMapIterator itor = map.entries(); + Int2LongOpenHashMap map = new Int2LongOpenHashMap(1000); + Int2LongOpenHashMap.MapIterator itor = map.entries(); Assert.assertFalse(itor.hasNext()); final int numEntries = 1000000; for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1L, map.put(i, i)); + Assert.assertEquals(0L, map.put(i, i)); + Assert.assertEquals(0L, map.put(-i, -i)); } - Assert.assertEquals(numEntries, map.size()); + Assert.assertEquals(numEntries * 2 - 1, map.size()); itor = map.entries(); Assert.assertTrue(itor.hasNext()); while (itor.hasNext()) { - Assert.assertFalse(itor.next() == -1); + Assert.assertTrue(itor.next()); int k = itor.getKey(); long v = itor.getValue(); Assert.assertEquals(k, v); } - Assert.assertEquals(-1, itor.next()); + Assert.assertFalse(itor.next()); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashTableTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashTableTest.java b/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashTableTest.java new file mode 100644 index 0000000..c2ce132 --- /dev/null +++ b/core/src/test/java/hivemall/utils/collections/maps/Int2LongOpenHashTableTest.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.lang.ObjectUtils; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; + +public class Int2LongOpenHashTableTest { + + @Test + public void testSize() { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + map.put(1, 3L); + Assert.assertEquals(3L, map.get(1)); + map.put(1, 5L); + Assert.assertEquals(5L, map.get(1)); + Assert.assertEquals(1, map.size()); + } + + @Test + public void testDefaultReturnValue() { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + Assert.assertEquals(0, map.size()); + Assert.assertEquals(-1L, map.get(1)); + long ret = Long.MIN_VALUE; + map.defaultReturnValue(ret); + Assert.assertEquals(ret, map.get(1)); + } + + @Test + public void testPutAndGet() { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1L, map.put(i, i)); + } + Assert.assertEquals(numEntries, map.size()); + for (int i = 0; i < numEntries; i++) { + long v = map.get(i); + Assert.assertEquals(i, v); + } + } + + @Test + public void testPutRemoveGet() { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + map.defaultReturnValue(0L); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(0L, map.put(i, i)); + Assert.assertEquals(0L, map.put(-i, -i)); + if (i % 2 == 0) { + Assert.assertEquals(i, map.remove(i)); + } else { + Assert.assertEquals(i, map.put(i, i)); + } + } + Assert.assertEquals(numEntries + (numEntries / 2) - 1, map.size()); + for (int i = 0; i < numEntries; i++) { + if (i % 2 == 0) { + Assert.assertFalse(map.containsKey(i)); + } else { + Assert.assertEquals(i, map.get(i)); + } + Assert.assertEquals(-i, map.get(-i)); + } + } + + @Test + public void testSerde() throws IOException, ClassNotFoundException { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(16384); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1L, map.put(i, i)); + } + + byte[] b = ObjectUtils.toCompressedBytes(map); + map = new Int2LongOpenHashTable(16384); + ObjectUtils.readCompressedObject(b, map); + + Assert.assertEquals(numEntries, map.size()); + for (int i = 0; i < numEntries; i++) { + long v = map.get(i); + Assert.assertEquals(i, v); + } + } + + @Test + public void testIterator() { + Int2LongOpenHashTable map = new Int2LongOpenHashTable(1000); + Int2LongOpenHashTable.MapIterator itor = map.entries(); + Assert.assertFalse(itor.hasNext()); + + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1L, map.put(i, i)); + } + Assert.assertEquals(numEntries, map.size()); + + itor = map.entries(); + Assert.assertTrue(itor.hasNext()); + while (itor.hasNext()) { + Assert.assertFalse(itor.next() == -1); + int k = itor.getKey(); + long v = itor.getValue(); + Assert.assertEquals(k, v); + } + Assert.assertEquals(-1, itor.next()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashMapTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashMapTest.java b/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashMapTest.java deleted file mode 100644 index 675c586..0000000 --- a/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashMapTest.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.collections.maps.IntOpenHashMap; - -import org.junit.Assert; -import org.junit.Test; - -public class IntOpenHashMapTest { - - @Test - public void testSize() { - IntOpenHashMap<Float> map = new IntOpenHashMap<Float>(16384); - map.put(1, Float.valueOf(3.f)); - Assert.assertEquals(Float.valueOf(3.f), map.get(1)); - map.put(1, Float.valueOf(5.f)); - Assert.assertEquals(Float.valueOf(5.f), map.get(1)); - Assert.assertEquals(1, map.size()); - } - - @Test - public void testPutAndGet() { - IntOpenHashMap<Integer> map = new IntOpenHashMap<Integer>(16384); - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertNull(map.put(i, i)); - } - Assert.assertEquals(numEntries, map.size()); - for (int i = 0; i < numEntries; i++) { - Integer v = map.get(i); - Assert.assertEquals(i, v.intValue()); - } - } - - @Test - public void testIterator() { - IntOpenHashMap<Integer> map = new IntOpenHashMap<Integer>(1000); - IntOpenHashMap.IMapIterator<Integer> itor = map.entries(); - Assert.assertFalse(itor.hasNext()); - - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertNull(map.put(i, i)); - } - Assert.assertEquals(numEntries, map.size()); - - itor = map.entries(); - Assert.assertTrue(itor.hasNext()); - while (itor.hasNext()) { - Assert.assertFalse(itor.next() == -1); - int k = itor.getKey(); - Integer v = itor.getValue(); - Assert.assertEquals(k, v.intValue()); - } - Assert.assertEquals(-1, itor.next()); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashTableTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashTableTest.java b/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashTableTest.java index d5887cd..46a3938 100644 --- a/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashTableTest.java +++ b/core/src/test/java/hivemall/utils/collections/maps/IntOpenHashTableTest.java @@ -49,4 +49,27 @@ public class IntOpenHashTableTest { } } + @Test + public void testIterator() { + IntOpenHashTable<Integer> map = new IntOpenHashTable<Integer>(1000); + IntOpenHashTable.IMapIterator<Integer> itor = map.entries(); + Assert.assertFalse(itor.hasNext()); + + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertNull(map.put(i, i)); + } + Assert.assertEquals(numEntries, map.size()); + + itor = map.entries(); + Assert.assertTrue(itor.hasNext()); + while (itor.hasNext()) { + Assert.assertFalse(itor.next() == -1); + int k = itor.getKey(); + Integer v = itor.getValue(); + Assert.assertEquals(k, v.intValue()); + } + Assert.assertEquals(-1, itor.next()); + } + } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashMapTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashMapTest.java b/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashMapTest.java deleted file mode 100644 index a03af53..0000000 --- a/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashMapTest.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.utils.collections.maps; - -import hivemall.utils.collections.maps.Long2IntOpenHashTable; -import hivemall.utils.lang.ObjectUtils; - -import java.io.IOException; - -import org.junit.Assert; -import org.junit.Test; - -public class Long2IntOpenHashMapTest { - - @Test - public void testSize() { - Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); - map.put(1L, 3); - Assert.assertEquals(3, map.get(1L)); - map.put(1L, 5); - Assert.assertEquals(5, map.get(1L)); - Assert.assertEquals(1, map.size()); - } - - @Test - public void testDefaultReturnValue() { - Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); - Assert.assertEquals(0, map.size()); - Assert.assertEquals(-1, map.get(1L)); - int ret = Integer.MAX_VALUE; - map.defaultReturnValue(ret); - Assert.assertEquals(ret, map.get(1L)); - } - - @Test - public void testPutAndGet() { - Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1L, map.put(i, i)); - } - Assert.assertEquals(numEntries, map.size()); - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(i, map.get(i)); - } - - map.clear(); - int i = 0; - for (long j = 1L + Integer.MAX_VALUE; i < 10000; j += 99L, i++) { - map.put(j, i); - } - Assert.assertEquals(i, map.size()); - i = 0; - for (long j = 1L + Integer.MAX_VALUE; i < 10000; j += 99L, i++) { - Assert.assertEquals(i, map.get(j)); - } - } - - @Test - public void testSerde() throws IOException, ClassNotFoundException { - Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1, map.put(i, i)); - } - - byte[] b = ObjectUtils.toCompressedBytes(map); - map = new Long2IntOpenHashTable(16384); - ObjectUtils.readCompressedObject(b, map); - - Assert.assertEquals(numEntries, map.size()); - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(i, map.get(i)); - } - } - - @Test - public void testIterator() { - Long2IntOpenHashTable map = new Long2IntOpenHashTable(1000); - Long2IntOpenHashTable.IMapIterator itor = map.entries(); - Assert.assertFalse(itor.hasNext()); - - final int numEntries = 1000000; - for (int i = 0; i < numEntries; i++) { - Assert.assertEquals(-1, map.put(i, i)); - } - Assert.assertEquals(numEntries, map.size()); - - itor = map.entries(); - Assert.assertTrue(itor.hasNext()); - while (itor.hasNext()) { - Assert.assertFalse(itor.next() == -1); - long k = itor.getKey(); - int v = itor.getValue(); - Assert.assertEquals(k, v); - } - Assert.assertEquals(-1, itor.next()); - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashTableTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashTableTest.java b/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashTableTest.java new file mode 100644 index 0000000..ca43383 --- /dev/null +++ b/core/src/test/java/hivemall/utils/collections/maps/Long2IntOpenHashTableTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.utils.collections.maps; + +import hivemall.utils.collections.maps.Long2IntOpenHashTable; +import hivemall.utils.lang.ObjectUtils; + +import java.io.IOException; + +import org.junit.Assert; +import org.junit.Test; + +public class Long2IntOpenHashTableTest { + + @Test + public void testSize() { + Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); + map.put(1L, 3); + Assert.assertEquals(3, map.get(1L)); + map.put(1L, 5); + Assert.assertEquals(5, map.get(1L)); + Assert.assertEquals(1, map.size()); + } + + @Test + public void testDefaultReturnValue() { + Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); + Assert.assertEquals(0, map.size()); + Assert.assertEquals(-1, map.get(1L)); + int ret = Integer.MAX_VALUE; + map.defaultReturnValue(ret); + Assert.assertEquals(ret, map.get(1L)); + } + + @Test + public void testPutAndGet() { + Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1L, map.put(i, i)); + } + Assert.assertEquals(numEntries, map.size()); + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(i, map.get(i)); + } + + map.clear(); + int i = 0; + for (long j = 1L + Integer.MAX_VALUE; i < 10000; j += 99L, i++) { + map.put(j, i); + } + Assert.assertEquals(i, map.size()); + i = 0; + for (long j = 1L + Integer.MAX_VALUE; i < 10000; j += 99L, i++) { + Assert.assertEquals(i, map.get(j)); + } + } + + @Test + public void testSerde() throws IOException, ClassNotFoundException { + Long2IntOpenHashTable map = new Long2IntOpenHashTable(16384); + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1, map.put(i, i)); + } + + byte[] b = ObjectUtils.toCompressedBytes(map); + map = new Long2IntOpenHashTable(16384); + ObjectUtils.readCompressedObject(b, map); + + Assert.assertEquals(numEntries, map.size()); + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(i, map.get(i)); + } + } + + @Test + public void testIterator() { + Long2IntOpenHashTable map = new Long2IntOpenHashTable(1000); + Long2IntOpenHashTable.IMapIterator itor = map.entries(); + Assert.assertFalse(itor.hasNext()); + + final int numEntries = 1000000; + for (int i = 0; i < numEntries; i++) { + Assert.assertEquals(-1, map.put(i, i)); + } + Assert.assertEquals(numEntries, map.size()); + + itor = map.entries(); + Assert.assertTrue(itor.hasNext()); + while (itor.hasNext()) { + Assert.assertFalse(itor.next() == -1); + long k = itor.getKey(); + int v = itor.getValue(); + Assert.assertEquals(k, v); + } + Assert.assertEquals(-1, itor.next()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/docs/gitbook/getting_started/input-format.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/input-format.md b/docs/gitbook/getting_started/input-format.md index 7bd8573..a01b5e3 100644 --- a/docs/gitbook/getting_started/input-format.md +++ b/docs/gitbook/getting_started/input-format.md @@ -190,25 +190,48 @@ from ## Quantitative Features -`array<string> quantitative_features(array<string> featureNames, ...)` is a helper function to create sparse quantitative features from a table. +`array<string> quantitative_features(array<string> featureNames, feature1, feature2, .. [, const string options])` is a helper function to create sparse quantitative features from a table. ```sql -select quantitative_features(array("apple","value"),1,120.3); +select quantitative_features( + array("apple","height","weight"), + 1,180.3,70.2 + -- ,"-emit_null" +); +``` +> ["apple:1.0","height:180.3","weight:70.2"] + +```sql +select quantitative_features( + array("apple","height","weight"), + 1,cast(null as double),70.2 + ,"-emit_null" +); ``` -> ["apple:1.0","value:120.3"] +> ["apple:1.0",null,"weight:70.2"] ## Categorical Features -`array<string> categorical_features(array<string> featureNames, ...)` is a helper function to create sparse categorical features from a table. +`array<string> categorical_features(array<string> featureNames, feature1, feature2, .. [, const string options])` is a helper function to create sparse categorical features from a table. ```sql select categorical_features( array("is_cat","is_dog","is_lion","is_pengin","species"), 1, 0, 1.0, true, "dog" + -- ,"-emit_null" ); ``` > ["is_cat#1","is_dog#0","is_lion#1.0","is_pengin#true","species#dog"] +```sql +select categorical_features( + array("is_cat","is_dog","is_lion","is_pengin","species"), + 1, 0, 1.0, true, null + ,"-emit_null" +); +``` +> ["is_cat#1","is_dog#0","is_lion#1.0","is_pengin#true",null] + ## Preparing training data table You can create a training data table as follows: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 8b6aa5e..3d7040c 100644 --- a/pom.xml +++ b/pom.xml @@ -288,6 +288,24 @@ <spark.binary.version>2.0</spark.binary.version> </properties> </profile> + <profile> + <id>java7</id> + <properties> + <spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:PermSize=128m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts> + </properties> + <activation> + <jdk>[,1.8)</jdk> <!-- version < 1.8 --> + </activation> + </profile> + <profile> + <id>java8</id> + <properties> + <spark.test.jvm.opts>-ea -Xms768m -Xmx1024m -XX:MetaspaceSize=128m -XX:MaxMetaspaceSize=512m -XX:ReservedCodeCacheSize=512m</spark.test.jvm.opts> + </properties> + <activation> + <jdk>[1.8,)</jdk> <!-- version >= 1.8 --> + </activation> + </profile> <profile> <id>compile-xgboost</id> <build> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/resources/ddl/define-all-as-permanent.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive index feb1a08..c2b38fb 100644 --- a/resources/ddl/define-all-as-permanent.hive +++ b/resources/ddl/define-all-as-permanent.hive @@ -313,6 +313,9 @@ CREATE FUNCTION binarize_label as 'hivemall.ftvec.trans.BinarizeLabelUDTF' USING DROP FUNCTION IF EXISTS onehot_encoding; CREATE FUNCTION onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF' USING JAR '${hivemall_jar}'; +DROP FUNCTION IF EXISTS add_field_indicies; +CREATE FUNCTION add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF' USING JAR '${hivemall_jar}'; + ------------------------------ -- ranking helper functions -- ------------------------------ @@ -620,7 +623,7 @@ DROP FUNCTION IF EXISTS train_ffm; CREATE FUNCTION train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF' USING JAR '${hivemall_jar}'; DROP FUNCTION IF EXISTS ffm_predict; -CREATE FUNCTION ffm_predict as 'hivemall.fm.FFMPredictUDF' USING JAR '${hivemall_jar}'; +CREATE FUNCTION ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF' USING JAR '${hivemall_jar}'; --------------------------- -- Anomaly Detection ------ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/resources/ddl/define-all.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive index 310f9f4..89821f8 100644 --- a/resources/ddl/define-all.hive +++ b/resources/ddl/define-all.hive @@ -309,6 +309,9 @@ create temporary function binarize_label as 'hivemall.ftvec.trans.BinarizeLabelU drop temporary function if exists onehot_encoding; create temporary function onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF'; +drop temporary function if exists add_field_indicies; +create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; + ------------------------------ -- ranking helper functions -- ------------------------------ @@ -612,7 +615,7 @@ drop temporary function if exists train_ffm; create temporary function train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF'; drop temporary function if exists ffm_predict; -create temporary function ffm_predict as 'hivemall.fm.FFMPredictUDF'; +create temporary function ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF'; --------------------------- -- Anomaly Detection ------ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/resources/ddl/define-all.spark ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark index 42b235b..b4926e3 100644 --- a/resources/ddl/define-all.spark +++ b/resources/ddl/define-all.spark @@ -312,6 +312,9 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION binarize_label AS 'hivemall.ftvec.tran sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS onehot_encoding") sqlContext.sql("CREATE TEMPORARY FUNCTION onehot_encoding AS 'hivemall.ftvec.trans.OnehotEncodingUDAF'") +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS add_field_indicies") +sqlContext.sql("CREATE TEMPORARY FUNCTION add_field_indicies AS 'hivemall.ftvec.trans.AddFieldIndicesUDF'") + /** * ranking helper functions */ @@ -596,7 +599,7 @@ sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS train_ffm") sqlContext.sql("CREATE TEMPORARY FUNCTION train_ffm AS 'hivemall.fm.FieldAwareFactorizationMachineUDTF'") sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS ffm_predict") -sqlContext.sql("CREATE TEMPORARY FUNCTION ffm_predict AS 'hivemall.fm.FFMPredictUDF'") +sqlContext.sql("CREATE TEMPORARY FUNCTION ffm_predict AS 'hivemall.fm.FFMPredictGenericUDAF'") /** * Anomaly Detection http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/resources/ddl/define-udfs.td.hql ---------------------------------------------------------------------- diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql index dd694e3..c7fdd49 100644 --- a/resources/ddl/define-udfs.td.hql +++ b/resources/ddl/define-udfs.td.hql @@ -174,6 +174,9 @@ create temporary function dimsum_mapper as 'hivemall.knn.similarity.DIMSUMMapper create temporary function train_classifier as 'hivemall.classifier.GeneralClassifierUDTF'; create temporary function train_regressor as 'hivemall.regression.GeneralRegressorUDTF'; create temporary function tree_export as 'hivemall.smile.tools.TreeExportUDF'; +create temporary function train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF'; +create temporary function ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF'; +create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; -- NLP features create temporary function tokenize_ja as 'hivemall.nlp.tokenizer.KuromojiUDF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/spark/spark-2.0/pom.xml ---------------------------------------------------------------------- diff --git a/spark/spark-2.0/pom.xml b/spark/spark-2.0/pom.xml index 123c424..74e9348 100644 --- a/spark/spark-2.0/pom.xml +++ b/spark/spark-2.0/pom.xml @@ -32,9 +32,6 @@ <packaging>jar</packaging> <properties> - <PermGen>64m</PermGen> - <MaxPermGen>512m</MaxPermGen> - <CodeCacheSize>512m</CodeCacheSize> <main.basedir>${project.parent.basedir}</main.basedir> </properties> @@ -164,11 +161,8 @@ <!-- <arg>-feature</arg> --> </args> <jvmArgs> - <jvmArg>-Xms1024m</jvmArg> + <jvmArg>-Xms512m</jvmArg> <jvmArg>-Xmx1024m</jvmArg> - <jvmArg>-XX:PermSize=${PermGen}</jvmArg> - <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg> - <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg> </jvmArgs> </configuration> </plugin> @@ -233,7 +227,7 @@ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory> <junitxml>.</junitxml> <filereports>SparkTestSuite.txt</filereports> - <argLine>-ea -Xmx2g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine> + <argLine>${spark.test.jvm.opts}</argLine> <stderr /> <environmentVariables> <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/spark/spark-2.1/pom.xml ---------------------------------------------------------------------- diff --git a/spark/spark-2.1/pom.xml b/spark/spark-2.1/pom.xml index 22d3e12..d7ab81a 100644 --- a/spark/spark-2.1/pom.xml +++ b/spark/spark-2.1/pom.xml @@ -32,9 +32,6 @@ <packaging>jar</packaging> <properties> - <PermGen>64m</PermGen> - <MaxPermGen>512m</MaxPermGen> - <CodeCacheSize>512m</CodeCacheSize> <main.basedir>${project.parent.basedir}</main.basedir> </properties> @@ -164,11 +161,8 @@ <!-- <arg>-feature</arg> --> </args> <jvmArgs> - <jvmArg>-Xms1024m</jvmArg> + <jvmArg>-Xms512m</jvmArg> <jvmArg>-Xmx1024m</jvmArg> - <jvmArg>-XX:PermSize=${PermGen}</jvmArg> - <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg> - <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg> </jvmArgs> </configuration> </plugin> @@ -233,7 +227,7 @@ <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory> <junitxml>.</junitxml> <filereports>SparkTestSuite.txt</filereports> - <argLine>-ea -Xmx2g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize}</argLine> + <argLine>${spark.test.jvm.opts}</argLine> <stderr /> <environmentVariables> <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/38047891/spark/spark-common/pom.xml ---------------------------------------------------------------------- diff --git a/spark/spark-common/pom.xml b/spark/spark-common/pom.xml index e8e8ff4..3153a75 100644 --- a/spark/spark-common/pom.xml +++ b/spark/spark-common/pom.xml @@ -32,9 +32,6 @@ <packaging>jar</packaging> <properties> - <PermGen>64m</PermGen> - <MaxPermGen>1024m</MaxPermGen> - <CodeCacheSize>512m</CodeCacheSize> <main.basedir>${project.parent.basedir}</main.basedir> </properties> @@ -138,11 +135,8 @@ <!-- <arg>-feature</arg> --> </args> <jvmArgs> - <jvmArg>-Xms1024m</jvmArg> + <jvmArg>-Xms512m</jvmArg> <jvmArg>-Xmx1024m</jvmArg> - <jvmArg>-XX:PermSize=${PermGen}</jvmArg> - <jvmArg>-XX:MaxPermSize=${MaxPermGen}</jvmArg> - <jvmArg>-XX:ReservedCodeCacheSize=${CodeCacheSize}</jvmArg> </jvmArgs> </configuration> </plugin>
