Repository: incubator-hivemall Updated Branches: refs/heads/master d4f4ab9ba -> fdf702143
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/math/vector/DenseFloatVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/vector/DenseFloatVector.java b/core/src/main/java/hivemall/math/vector/DenseFloatVector.java new file mode 100644 index 0000000..2bb8144 --- /dev/null +++ b/core/src/main/java/hivemall/math/vector/DenseFloatVector.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.math.vector; + +import java.util.Arrays; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; + +public final class DenseFloatVector extends AbstractVector { + + @Nonnull + private final float[] values; + private final int size; + + public DenseFloatVector(@Nonnegative int size) { + super(); + this.values = new float[size]; + this.size = size; + } + + public DenseFloatVector(@Nonnull float[] values) { + super(); + this.values = values; + this.size = values.length; + } + + @Override + public float get(@Nonnegative final int index, final float defaultValue) { + checkIndex(index); + if (index >= size) { + return defaultValue; + } + + return values[index]; + } + + @Override + public double get(@Nonnegative final int index, final double defaultValue) { + checkIndex(index); + if (index >= size) { + return defaultValue; + } + + return values[index]; + } + + @Override + public void set(@Nonnegative final int index, final float value) { + checkIndex(index, size); + + values[index] = value; + } + + @Override + public void set(@Nonnegative final int index, final double value) { + checkIndex(index, size); + + values[index] = (float) value; + } + + @Override + public void incr(@Nonnegative final int index, final double delta) { + checkIndex(index, size); + + values[index] += delta; + } + + @Override + public void each(@Nonnull final VectorProcedure procedure) { + for (int i = 0; i < values.length; i++) { + procedure.apply(i, values[i]); + } + } + + @Override + public int size() { + return size; + } + + @Override + public void clear() { + Arrays.fill(values, 0.f); + } + + @Override + public double[] toArray() { + throw new UnsupportedOperationException(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/math/vector/SparseFloatVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/vector/SparseFloatVector.java b/core/src/main/java/hivemall/math/vector/SparseFloatVector.java new file mode 100644 index 0000000..91391b5 --- /dev/null +++ b/core/src/main/java/hivemall/math/vector/SparseFloatVector.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.math.vector; + +import hivemall.utils.collections.arrays.SparseFloatArray; + +import javax.annotation.Nonnegative; +import javax.annotation.Nonnull; + +public final class SparseFloatVector extends AbstractVector { + + @Nonnull + private final SparseFloatArray values; + + public SparseFloatVector() { + super(); + this.values = new SparseFloatArray(); + } + + public SparseFloatVector(@Nonnull SparseFloatArray values) { + super(); + this.values = values; + } + + @Override + public float get(@Nonnegative final int index, final float defaultValue) { + return values.get(index, defaultValue); + } + + @Override + public double get(@Nonnegative final int index, final double defaultValue) { + return values.get(index, (float) defaultValue); + } + + @Override + public void set(@Nonnegative final int index, final float value) { + values.put(index, value); + } + + @Override + public void set(@Nonnegative final int index, final double value) { + values.put(index, (float) value); + } + + @Override + public void incr(@Nonnegative final int index, final double delta) { + values.increment(index, (float) delta); + } + + @Override + public void each(@Nonnull final VectorProcedure procedure) { + values.each(procedure); + } + + @Override + public int size() { + return values.size(); + } + + @Override + public void clear() { + values.clear(); + } + + @Override + public double[] toArray() { + throw new UnsupportedOperationException(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/math/vector/Vector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/vector/Vector.java b/core/src/main/java/hivemall/math/vector/Vector.java index 2e5107d..d1d3ebc 100644 --- a/core/src/main/java/hivemall/math/vector/Vector.java +++ b/core/src/main/java/hivemall/math/vector/Vector.java @@ -25,6 +25,8 @@ public interface Vector { public double get(@Nonnegative int index); + public float get(@Nonnegative int index, float defaultValue); + public double get(@Nonnegative int index, double defaultValue); /** @@ -32,6 +34,11 @@ public interface Vector { */ public void set(@Nonnegative int index, double value); + /** + * @throws UnsupportedOperationException + */ + public void set(@Nonnegative int index, float value); + public void incr(@Nonnegative int index, double delta); public void each(@Nonnull VectorProcedure procedure); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/math/vector/VectorProcedure.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/math/vector/VectorProcedure.java b/core/src/main/java/hivemall/math/vector/VectorProcedure.java index 3f3c390..4978885 100644 --- a/core/src/main/java/hivemall/math/vector/VectorProcedure.java +++ b/core/src/main/java/hivemall/math/vector/VectorProcedure.java @@ -30,6 +30,10 @@ public abstract class VectorProcedure { public void apply(@Nonnegative int i, @Nonnegative int j, double value) {} + public void apply(@Nonnegative int i, float value) { + apply(i, (double) value); + } + public void apply(@Nonnegative int i, double value) {} public void apply(@Nonnegative int i, int value) {} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/recommend/SlimUDTF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/recommend/SlimUDTF.java b/core/src/main/java/hivemall/recommend/SlimUDTF.java index e205c18..c6363f1 100644 --- a/core/src/main/java/hivemall/recommend/SlimUDTF.java +++ b/core/src/main/java/hivemall/recommend/SlimUDTF.java @@ -21,7 +21,8 @@ package hivemall.recommend; import hivemall.UDTFWithOptions; import hivemall.annotations.VisibleForTesting; import hivemall.common.ConversionState; -import hivemall.math.matrix.sparse.DoKFloatMatrix; +import hivemall.math.matrix.FloatMatrix; +import hivemall.math.matrix.sparse.floats.DoKFloatMatrix; import hivemall.math.vector.VectorProcedure; import hivemall.utils.collections.maps.Int2FloatOpenHashTable; import hivemall.utils.collections.maps.IntOpenHashTable; @@ -133,7 +134,7 @@ public class SlimUDTF extends UDTFWithOptions { /** item-user matrix holding the input data */ @Nullable - private transient DoKFloatMatrix _dataMatrix; + private transient FloatMatrix _dataMatrix; // used to store KNN data into temporary file for iterative training private transient NioStatefullSegment _fileIO; @@ -362,7 +363,7 @@ public class SlimUDTF extends UDTFWithOptions { private void train(final int itemI, @Nonnull final Int2FloatOpenHashTable ri, @Nonnull final IntOpenHashTable<Int2FloatOpenHashTable> kNNi, final int itemJ, @Nonnull final Int2FloatOpenHashTable rj) { - final DoKFloatMatrix W = _weightMatrix; + final FloatMatrix W = _weightMatrix; final int N = rj.size(); if (N == 0) { @@ -397,8 +398,8 @@ public class SlimUDTF extends UDTFWithOptions { private void train(final int itemI, @Nonnull final IntOpenHashTable<Int2FloatOpenHashTable> knnItems, final int itemJ) { - final DoKFloatMatrix A = _dataMatrix; - final DoKFloatMatrix W = _weightMatrix; + final FloatMatrix A = _dataMatrix; + final FloatMatrix W = _weightMatrix; final int N = A.numColumns(itemJ); if (N == 0) { @@ -433,7 +434,7 @@ public class SlimUDTF extends UDTFWithOptions { private static double predict(final int user, final int itemI, @Nonnull final IntOpenHashTable<Int2FloatOpenHashTable> knnItems, - final int excludeIndex, @Nonnull final DoKFloatMatrix weightMatrix) { + final int excludeIndex, @Nonnull final FloatMatrix weightMatrix) { final Int2FloatOpenHashTable kNNu = knnItems.get(user); if (kNNu == null) { return 0.d; @@ -725,8 +726,7 @@ public class SlimUDTF extends UDTFWithOptions { @Nonnull private static Int2FloatOpenHashTable int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, - @Nonnull final PrimitiveObjectInspector valueOI, - @Nullable final DoKFloatMatrix dataMatrix) { + @Nonnull final PrimitiveObjectInspector valueOI, @Nullable final FloatMatrix dataMatrix) { return int2floatMap(item, map, keyOI, valueOI, dataMatrix, null); } @@ -734,7 +734,7 @@ public class SlimUDTF extends UDTFWithOptions { private static Int2FloatOpenHashTable int2floatMap(final int item, @Nonnull final Map<?, ?> map, @Nonnull final PrimitiveObjectInspector keyOI, @Nonnull final PrimitiveObjectInspector valueOI, - @Nullable final DoKFloatMatrix dataMatrix, @Nullable Int2FloatOpenHashTable dst) { + @Nullable final FloatMatrix dataMatrix, @Nullable Int2FloatOpenHashTable dst) { if (dst == null) { dst = new Int2FloatOpenHashTable(map.size()); dst.defaultReturnValue(0.f); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java b/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java index 5e27e12..a665764 100644 --- a/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java +++ b/core/src/main/java/hivemall/smile/utils/SmileExtUtils.java @@ -305,14 +305,14 @@ public final class SmileExtUtils { } return x; } else { - final int[] indicies = MathUtils.permutation(numRows); + final int[] indices = MathUtils.permutation(numRows); for (int i = numRows; i > 1; i--) { int j = rnd.nextInt(i); int k = i - 1; - swap(indicies, k, j); + swap(indices, k, j); swap(y, k, j); } - return MatrixUtils.shuffle(x, indicies); + return MatrixUtils.shuffle(x, indices); } } @@ -338,14 +338,14 @@ public final class SmileExtUtils { } return x; } else { - final int[] indicies = MathUtils.permutation(numRows); + final int[] indices = MathUtils.permutation(numRows); for (int i = numRows; i > 1; i--) { int j = rnd.nextInt(i); int k = i - 1; - swap(indicies, k, j); + swap(indices, k, j); swap(y, k, j); } - return MatrixUtils.shuffle(x, indicies); + return MatrixUtils.shuffle(x, indices); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/main/java/hivemall/utils/collections/arrays/SparseFloatArray.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/collections/arrays/SparseFloatArray.java b/core/src/main/java/hivemall/utils/collections/arrays/SparseFloatArray.java index 928de77..4b38c7d 100644 --- a/core/src/main/java/hivemall/utils/collections/arrays/SparseFloatArray.java +++ b/core/src/main/java/hivemall/utils/collections/arrays/SparseFloatArray.java @@ -18,6 +18,7 @@ */ package hivemall.utils.collections.arrays; +import hivemall.math.vector.VectorProcedure; import hivemall.utils.lang.ArrayUtils; import hivemall.utils.lang.Preconditions; @@ -184,6 +185,14 @@ public final class SparseFloatArray implements FloatArray { return array; } + public void each(@Nonnull final VectorProcedure procedure) { + for (int i = 0; i < mSize; i++) { + int k = mKeys[i]; + float v = mValues[i]; + procedure.apply(k, v); + } + } + @Override public String toString() { if (size() <= 0) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/test/java/hivemall/math/matrix/MatrixUtilsTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/math/matrix/MatrixUtilsTest.java b/core/src/test/java/hivemall/math/matrix/MatrixUtilsTest.java new file mode 100644 index 0000000..409bc65 --- /dev/null +++ b/core/src/test/java/hivemall/math/matrix/MatrixUtilsTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.math.matrix; + +import hivemall.math.matrix.sparse.CSCMatrix; +import hivemall.math.matrix.sparse.CSRMatrix; +import hivemall.utils.lang.ArrayUtils; + +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +public class MatrixUtilsTest { + + @Test + public void testCoo2csr() { + // 10 0 0 0 -2 0 + // 3 9 0 0 0 3 + // 0 7 8 7 0 0 + // 3 0 8 7 5 0 + // 0 8 0 9 9 13 + // 0 4 0 0 2 -1 + double[] row1 = new double[] {10, 0, 0, 0, -2, 0}; + double[] row2 = new double[] {3, 9, 0, 0, 0, 3}; + double[] row3 = new double[] {0, 7, 8, 7, 0, 0}; + double[] row4 = new double[] {3, 0, 8, 7, 5, 0}; + double[] row5 = new double[] {0, 8, 0, 9, 9, 13}; + double[] row6 = new double[] {0, 4, 0, 0, 2, -1}; + double[][] matrix = new double[][] {row1, row2, row3, row4, row5, row6}; + + int[] rows = new int[] {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5}; + int[] cols = new int[] {0, 4, 0, 1, 5, 1, 2, 3, 0, 2, 3, 4, 1, 3, 4, 5, 1, 4, 5}; + double[] data = new double[] {10, -2, 3, 9, 3, 7, 8, 7, 3, 8, 7, 5, 8, 9, 9, 13, 4, 2, -1}; + + CSRMatrix matrix1 = MatrixUtils.coo2csr(rows, cols, data, 6, 6, false); + Assert.assertEquals(data.length, matrix1.nnz()); + + final Random rnd = new Random(43L); + for (int i = data.length; i > 1; i--) { + int to = rnd.nextInt(i); + int from = i - 1; + ArrayUtils.swap(rows, from, to); + ArrayUtils.swap(cols, from, to); + ArrayUtils.swap(data, from, to); + } + + CSRMatrix matrix2 = MatrixUtils.coo2csr(rows, cols, data, 6, 6, true); + Assert.assertEquals(data.length, matrix2.nnz()); + + double[] dst1 = matrix1.row(); + Assert.assertEquals(6, matrix1.numRows()); + for (int row = 0; row < matrix1.numRows(); row++) { + matrix1.getRow(row, dst1); + Assert.assertArrayEquals(matrix[row], dst1, 1E-7d); + } + + double[] dst2 = matrix2.row(); + Assert.assertEquals(6, matrix2.numRows()); + for (int row = 0; row < matrix2.numRows(); row++) { + matrix2.getRow(row, dst2); + Assert.assertArrayEquals(matrix[row], dst2, 1E-7d); + } + } + + @Test + public void testCoo2csc() { + // 10 0 0 0 -2 0 + // 3 9 0 0 0 3 + // 0 7 8 7 0 0 + // 3 0 8 7 5 0 + // 0 8 0 9 9 13 + // 0 4 0 0 2 -1 + double[] row1 = new double[] {10, 0, 0, 0, -2, 0}; + double[] row2 = new double[] {3, 9, 0, 0, 0, 3}; + double[] row3 = new double[] {0, 7, 8, 7, 0, 0}; + double[] row4 = new double[] {3, 0, 8, 7, 5, 0}; + double[] row5 = new double[] {0, 8, 0, 9, 9, 13}; + double[] row6 = new double[] {0, 4, 0, 0, 2, -1}; + double[][] matrix = new double[][] {row1, row2, row3, row4, row5, row6}; + + int[] rows = new int[] {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5}; + int[] cols = new int[] {0, 4, 0, 1, 5, 1, 2, 3, 0, 2, 3, 4, 1, 3, 4, 5, 1, 4, 5}; + double[] data = new double[] {10, -2, 3, 9, 3, 7, 8, 7, 3, 8, 7, 5, 8, 9, 9, 13, 4, 2, -1}; + + CSCMatrix matrix1 = MatrixUtils.coo2csc(rows, cols, data, 6, 6, false); + Assert.assertEquals(data.length, matrix1.nnz()); + + final Random rnd = new Random(43L); + for (int i = data.length; i > 1; i--) { + int to = rnd.nextInt(i); + int from = i - 1; + ArrayUtils.swap(rows, from, to); + ArrayUtils.swap(cols, from, to); + ArrayUtils.swap(data, from, to); + } + + CSCMatrix matrix2 = MatrixUtils.coo2csc(rows, cols, data, 6, 6, true); + Assert.assertEquals(data.length, matrix2.nnz()); + + double[] dst1 = matrix1.row(); + Assert.assertEquals(6, matrix1.numRows()); + for (int row = 0; row < matrix1.numRows(); row++) { + matrix1.getRow(row, dst1); + Assert.assertArrayEquals(matrix[row], dst1, 1E-7d); + } + + double[] dst2 = matrix2.row(); + Assert.assertEquals(6, matrix2.numRows()); + for (int row = 0; row < matrix2.numRows(); row++) { + matrix2.getRow(row, dst2); + Assert.assertArrayEquals(matrix[row], dst2, 1E-7d); + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/test/java/hivemall/math/matrix/sparse/DoKFloatMatrixTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/math/matrix/sparse/DoKFloatMatrixTest.java b/core/src/test/java/hivemall/math/matrix/sparse/DoKFloatMatrixTest.java deleted file mode 100644 index c9e6afd..0000000 --- a/core/src/test/java/hivemall/math/matrix/sparse/DoKFloatMatrixTest.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.math.matrix.sparse; - -import java.util.Random; - -import org.junit.Assert; -import org.junit.Test; - -public class DoKFloatMatrixTest { - - @Test - public void testGetSet() { - DoKFloatMatrix matrix = new DoKFloatMatrix(); - Random rnd = new Random(43); - - for (int i = 0; i < 1000; i++) { - int row = Math.abs(rnd.nextInt()); - int col = Math.abs(rnd.nextInt()); - double v = rnd.nextDouble(); - matrix.set(row, col, v); - Assert.assertEquals(v, matrix.get(row, col), 0.00001d); - } - - } - -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/test/java/hivemall/math/matrix/sparse/DoKMatrixTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/math/matrix/sparse/DoKMatrixTest.java b/core/src/test/java/hivemall/math/matrix/sparse/DoKMatrixTest.java new file mode 100644 index 0000000..be6b424 --- /dev/null +++ b/core/src/test/java/hivemall/math/matrix/sparse/DoKMatrixTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.math.matrix.sparse; + +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +public class DoKMatrixTest { + + @Test + public void testGetSet() { + DoKMatrix matrix = new DoKMatrix(); + Random rnd = new Random(43); + + for (int i = 0; i < 1000; i++) { + int row = Math.abs(rnd.nextInt()); + int col = Math.abs(rnd.nextInt()); + double v = rnd.nextDouble(); + matrix.set(row, col, v); + Assert.assertEquals(v, matrix.get(row, col), 0.00001d); + } + + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/core/src/test/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrixTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrixTest.java b/core/src/test/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrixTest.java new file mode 100644 index 0000000..697d348 --- /dev/null +++ b/core/src/test/java/hivemall/math/matrix/sparse/floats/DoKFloatMatrixTest.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package hivemall.math.matrix.sparse.floats; + +import hivemall.math.matrix.sparse.floats.DoKFloatMatrix; + +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +public class DoKFloatMatrixTest { + + @Test + public void testGetSet() { + DoKFloatMatrix matrix = new DoKFloatMatrix(); + Random rnd = new Random(43); + + for (int i = 0; i < 1000; i++) { + int row = Math.abs(rnd.nextInt()); + int col = Math.abs(rnd.nextInt()); + float v = rnd.nextFloat(); + matrix.set(row, col, v); + Assert.assertEquals(v, matrix.get(row, col), 0.00001f); + } + } + + @Test + public void testToRowMajorMatrix() { + DoKFloatMatrix matrix = new DoKFloatMatrix(); + Random rnd = new Random(43); + + for (int i = 0; i < 10; i++) { + int row = Math.abs(rnd.nextInt(100)); + int col = Math.abs(rnd.nextInt(100)); + float v = rnd.nextFloat(); + matrix.set(row, col, v); + } + + CSRFloatMatrix csr = matrix.toRowMajorMatrix(); + Assert.assertEquals(10, csr.nnz()); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/docs/gitbook/getting_started/installation.md ---------------------------------------------------------------------- diff --git a/docs/gitbook/getting_started/installation.md b/docs/gitbook/getting_started/installation.md index ee07afb..26db29c 100644 --- a/docs/gitbook/getting_started/installation.md +++ b/docs/gitbook/getting_started/installation.md @@ -22,7 +22,7 @@ Prerequisites * Hadoop v2.4.0 or later * Hive v0.13 or later -* Java 7 or later +* Java 8 or later (Java 7 in v0.4.2 or before) * [hivemall-core-xxx-with-dependencies.jar](https://github.com/myui/hivemall/releases) * [define-all.hive](https://github.com/myui/hivemall/releases) http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 8a543e6..f26b996 100644 --- a/pom.xml +++ b/pom.xml @@ -241,8 +241,8 @@ </modules> <properties> - <java.source.version>1.7</java.source.version> - <java.target.version>1.7</java.target.version> + <java.source.version>1.8</java.source.version> + <java.target.version>1.8</java.target.version> <scala.version>2.11.8</scala.version> <scala.binary.version>2.11</scala.binary.version> <maven.build.timestamp.format>yyyy</maven.build.timestamp.format> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/resources/ddl/define-all-as-permanent.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all-as-permanent.hive b/resources/ddl/define-all-as-permanent.hive index 7906375..b0107af 100644 --- a/resources/ddl/define-all-as-permanent.hive +++ b/resources/ddl/define-all-as-permanent.hive @@ -313,6 +313,10 @@ CREATE FUNCTION binarize_label as 'hivemall.ftvec.trans.BinarizeLabelUDTF' USING DROP FUNCTION IF EXISTS onehot_encoding; CREATE FUNCTION onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF' USING JAR '${hivemall_jar}'; +DROP FUNCTION IF EXISTS add_field_indices; +CREATE FUNCTION add_field_indices as 'hivemall.ftvec.trans.AddFieldIndicesUDF' USING JAR '${hivemall_jar}'; + +-- alias for backward compatibility DROP FUNCTION IF EXISTS add_field_indicies; CREATE FUNCTION add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF' USING JAR '${hivemall_jar}'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/resources/ddl/define-all.hive ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.hive b/resources/ddl/define-all.hive index 1b1a035..4f91f79 100644 --- a/resources/ddl/define-all.hive +++ b/resources/ddl/define-all.hive @@ -309,6 +309,10 @@ create temporary function binarize_label as 'hivemall.ftvec.trans.BinarizeLabelU drop temporary function if exists onehot_encoding; create temporary function onehot_encoding as 'hivemall.ftvec.trans.OnehotEncodingUDAF'; +drop temporary function if exists add_field_indices; +create temporary function add_field_indices as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; + +-- alias for backward compatibility drop temporary function if exists add_field_indicies; create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/resources/ddl/define-all.spark ---------------------------------------------------------------------- diff --git a/resources/ddl/define-all.spark b/resources/ddl/define-all.spark index 7e6cacd..02f92ec 100644 --- a/resources/ddl/define-all.spark +++ b/resources/ddl/define-all.spark @@ -312,6 +312,10 @@ sqlContext.sql("CREATE TEMPORARY FUNCTION binarize_label AS 'hivemall.ftvec.tran sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS onehot_encoding") sqlContext.sql("CREATE TEMPORARY FUNCTION onehot_encoding AS 'hivemall.ftvec.trans.OnehotEncodingUDAF'") +sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS add_field_indices") +sqlContext.sql("CREATE TEMPORARY FUNCTION add_field_indices AS 'hivemall.ftvec.trans.AddFieldIndicesUDF'") + +// alias for backward compatibility sqlContext.sql("DROP TEMPORARY FUNCTION IF EXISTS add_field_indicies") sqlContext.sql("CREATE TEMPORARY FUNCTION add_field_indicies AS 'hivemall.ftvec.trans.AddFieldIndicesUDF'") http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/resources/ddl/define-udfs.td.hql ---------------------------------------------------------------------- diff --git a/resources/ddl/define-udfs.td.hql b/resources/ddl/define-udfs.td.hql index 4b67fea..28b77cb 100644 --- a/resources/ddl/define-udfs.td.hql +++ b/resources/ddl/define-udfs.td.hql @@ -177,7 +177,7 @@ create temporary function train_regressor as 'hivemall.regression.GeneralRegress create temporary function tree_export as 'hivemall.smile.tools.TreeExportUDF'; create temporary function train_ffm as 'hivemall.fm.FieldAwareFactorizationMachineUDTF'; create temporary function ffm_predict as 'hivemall.fm.FFMPredictGenericUDAF'; -create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; +create temporary function add_field_indices as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; create temporary function to_ordered_list as 'hivemall.tools.list.UDAFToOrderedList'; create temporary function singularize as 'hivemall.tools.text.SingularizeUDF'; create temporary function train_slim as 'hivemall.recommend.SlimUDTF'; @@ -194,3 +194,4 @@ create temporary function pa2a_regress as 'hivemall.regression.PassiveAggressive create temporary function arow_regress as 'hivemall.regression.AROWRegressionUDTF'; create temporary function addBias as 'hivemall.ftvec.AddBiasUDF'; create temporary function tree_predict_v1 as 'hivemall.smile.tools.TreePredictUDFv1'; +create temporary function add_field_indicies as 'hivemall.ftvec.trans.AddFieldIndicesUDF'; http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/fdf70214/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java index cba8e0f..0b687db 100644 --- a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java +++ b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java @@ -41,7 +41,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn */ @Description( name = "add_feature_index", - value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indicies") + value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices") @UDFType(deterministic = true, stateful = false) public class AddFeatureIndexUDFWrapper extends GenericUDF { private AddFeatureIndexUDF udf = new AddFeatureIndexUDF();