add license and format
Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/ad81b3aa Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/ad81b3aa Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/ad81b3aa Branch: refs/heads/JIRA-22/pr-385 Commit: ad81b3aa5a0bbb7c248d127ba44608578c01ae00 Parents: 1ab9b09 Author: amaya <g...@sapphire.in.net> Authored: Tue Sep 20 17:05:55 2016 +0900 Committer: amaya <g...@sapphire.in.net> Committed: Tue Sep 20 18:37:51 2016 +0900 ---------------------------------------------------------------------- .../hivemall/ftvec/selection/ChiSquareUDF.java | 92 ++++++++++++-------- .../tools/array/ArrayTopKIndicesUDF.java | 29 ++++-- .../tools/array/SubarrayByIndicesUDF.java | 36 ++++++-- .../tools/matrix/TransposeAndDotUDAF.java | 64 +++++++++----- .../java/hivemall/utils/hadoop/HiveUtils.java | 10 ++- .../java/hivemall/utils/math/StatsUtils.java | 29 +++--- 6 files changed, 171 insertions(+), 89 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java index 1954e33..e2b7494 100644 --- a/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java +++ b/core/src/main/java/hivemall/ftvec/selection/ChiSquareUDF.java @@ -1,3 +1,21 @@ +/* + * Hivemall: Hive scalable Machine Learning Library + * + * Copyright (C) 2016 Makoto YUI + * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package hivemall.ftvec.selection; import hivemall.utils.hadoop.HiveUtils; @@ -10,24 +28,20 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import javax.annotation.Nonnull; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; @Description(name = "chi2", - value = "_FUNC_(array<array<number>> observed, array<array<number>> expected)" + - " - Returns chi2_val and p_val of each columns as <array<double>, array<double>>") + value = "_FUNC_(array<array<number>> observed, array<array<number>> expected)" + + " - Returns chi2_val and p_val of each columns as <array<double>, array<double>>") public class ChiSquareUDF extends GenericUDF { private ListObjectInspector observedOI; private ListObjectInspector observedRowOI; @@ -42,31 +56,31 @@ public class ChiSquareUDF extends GenericUDF { throw new UDFArgumentLengthException("Specify two arguments."); } - if (!HiveUtils.isNumberListListOI(OIs[0])){ - throw new UDFArgumentTypeException(0, "Only array<array<number>> type argument is acceptable but " - + OIs[0].getTypeName() + " was passed as `observed`"); + if (!HiveUtils.isNumberListListOI(OIs[0])) { + throw new UDFArgumentTypeException(0, + "Only array<array<number>> type argument is acceptable but " + OIs[0].getTypeName() + + " was passed as `observed`"); } - if (!HiveUtils.isNumberListListOI(OIs[1])){ - throw new UDFArgumentTypeException(1, "Only array<array<number>> type argument is acceptable but " - + OIs[1].getTypeName() + " was passed as `expected`"); + if (!HiveUtils.isNumberListListOI(OIs[1])) { + throw new UDFArgumentTypeException(1, + "Only array<array<number>> type argument is acceptable but " + OIs[1].getTypeName() + + " was passed as `expected`"); } observedOI = HiveUtils.asListOI(OIs[1]); - observedRowOI=HiveUtils.asListOI(observedOI.getListElementObjectInspector()); - observedElOI = HiveUtils.asDoubleCompatibleOI( observedRowOI.getListElementObjectInspector()); - expectedOI = HiveUtils.asListOI(OIs[0]); - expectedRowOI=HiveUtils.asListOI(expectedOI.getListElementObjectInspector()); + observedRowOI = HiveUtils.asListOI(observedOI.getListElementObjectInspector()); + observedElOI = HiveUtils.asDoubleCompatibleOI(observedRowOI.getListElementObjectInspector()); + expectedOI = HiveUtils.asListOI(OIs[0]); + expectedRowOI = HiveUtils.asListOI(expectedOI.getListElementObjectInspector()); expectedElOI = HiveUtils.asDoubleCompatibleOI(expectedRowOI.getListElementObjectInspector()); List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); - fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); return ObjectInspectorFactory.getStandardStructObjectInspector( - Arrays.asList("chi2_vals", "p_vals"), fieldOIs); + Arrays.asList("chi2_vals", "p_vals"), fieldOIs); } @Override @@ -76,40 +90,44 @@ public class ChiSquareUDF extends GenericUDF { Preconditions.checkNotNull(observedObj); Preconditions.checkNotNull(expectedObj); - final int nClasses = observedObj.size(); + final int nClasses = observedObj.size(); Preconditions.checkArgument(nClasses == expectedObj.size()); // same #rows - int nFeatures=-1; - double[] observedRow=null; // to reuse - double[] expectedRow=null; // to reuse - double[][] observed =null; // shape = (#features, #classes) + int nFeatures = -1; + double[] observedRow = null; // to reuse + double[] expectedRow = null; // to reuse + double[][] observed = null; // shape = (#features, #classes) double[][] expected = null; // shape = (#features, #classes) // explode and transpose matrix - for(int i=0;i<nClasses;i++){ - if(i==0){ + for (int i = 0; i < nClasses; i++) { + if (i == 0) { // init - observedRow=HiveUtils.asDoubleArray(observedObj.get(i),observedRowOI,observedElOI,false); - expectedRow=HiveUtils.asDoubleArray(expectedObj.get(i),expectedRowOI,expectedElOI, false); + observedRow = HiveUtils.asDoubleArray(observedObj.get(i), observedRowOI, + observedElOI, false); + expectedRow = HiveUtils.asDoubleArray(expectedObj.get(i), expectedRowOI, + expectedElOI, false); nFeatures = observedRow.length; - observed=new double[nFeatures][nClasses]; + observed = new double[nFeatures][nClasses]; expected = new double[nFeatures][nClasses]; - }else{ - HiveUtils.toDoubleArray(observedObj.get(i),observedRowOI,observedElOI,observedRow,false); - HiveUtils.toDoubleArray(expectedObj.get(i),expectedRowOI,expectedElOI,expectedRow, false); + } else { + HiveUtils.toDoubleArray(observedObj.get(i), observedRowOI, observedElOI, + observedRow, false); + HiveUtils.toDoubleArray(expectedObj.get(i), expectedRowOI, expectedElOI, + expectedRow, false); } - for(int j=0;j<nFeatures;j++){ + for (int j = 0; j < nFeatures; j++) { observed[j][i] = observedRow[j]; expected[j][i] = expectedRow[j]; } } - final Map.Entry<double[],double[]> chi2 = StatsUtils.chiSquares(observed,expected); + final Map.Entry<double[], double[]> chi2 = StatsUtils.chiSquares(observed, expected); final Object[] result = new Object[2]; result[0] = WritableUtils.toWritableList(chi2.getKey()); - result[1]=WritableUtils.toWritableList(chi2.getValue()); + result[1] = WritableUtils.toWritableList(chi2.getValue()); return result; } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java b/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java index bf9fe15..f895f9b 100644 --- a/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java +++ b/core/src/main/java/hivemall/tools/array/ArrayTopKIndicesUDF.java @@ -1,3 +1,21 @@ +/* + * Hivemall: Hive scalable Machine Learning Library + * + * Copyright (C) 2016 Makoto YUI + * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package hivemall.tools.array; import hivemall.utils.hadoop.HiveUtils; @@ -22,7 +40,8 @@ import java.util.Comparator; import java.util.List; import java.util.Map; -@Description(name = "array_top_k_indices", +@Description( + name = "array_top_k_indices", value = "_FUNC_(array<number> array, const int k) - Returns indices array of top-k as array<int>") public class ArrayTopKIndicesUDF extends GenericUDF { private ListObjectInspector arrayOI; @@ -36,8 +55,9 @@ public class ArrayTopKIndicesUDF extends GenericUDF { } if (!HiveUtils.isNumberListOI(OIs[0])) { - throw new UDFArgumentTypeException(0, "Only array<number> type argument is acceptable but " - + OIs[0].getTypeName() + " was passed as `array`"); + throw new UDFArgumentTypeException(0, + "Only array<number> type argument is acceptable but " + OIs[0].getTypeName() + + " was passed as `array`"); } if (!HiveUtils.isIntegerOI(OIs[1])) { throw new UDFArgumentTypeException(1, "Only int type argument is acceptable but " @@ -48,8 +68,7 @@ public class ArrayTopKIndicesUDF extends GenericUDF { elementOI = HiveUtils.asDoubleCompatibleOI(arrayOI.getListElementObjectInspector()); kOI = HiveUtils.asIntegerOI(OIs[1]); - return ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableIntObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java b/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java index f476589..07e158a 100644 --- a/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java +++ b/core/src/main/java/hivemall/tools/array/SubarrayByIndicesUDF.java @@ -1,6 +1,23 @@ +/* + * Hivemall: Hive scalable Machine Learning Library + * + * Copyright (C) 2016 Makoto YUI + * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package hivemall.tools.array; - import hivemall.utils.hadoop.HiveUtils; import hivemall.utils.lang.Preconditions; import org.apache.hadoop.hive.ql.exec.Description; @@ -21,8 +38,8 @@ import java.util.ArrayList; import java.util.List; @Description(name = "subarray_by_indices", - value = "_FUNC_(array<number> input, array<int> indices)" + - " - Returns subarray selected by given indices as array<number>") + value = "_FUNC_(array<number> input, array<int> indices)" + + " - Returns subarray selected by given indices as array<number>") public class SubarrayByIndicesUDF extends GenericUDF { private ListObjectInspector inputOI; private PrimitiveObjectInspector elementOI; @@ -36,13 +53,15 @@ public class SubarrayByIndicesUDF extends GenericUDF { } if (!HiveUtils.isListOI(OIs[0])) { - throw new UDFArgumentTypeException(0, "Only array<number> type argument is acceptable but " - + OIs[0].getTypeName() + " was passed as `input`"); + throw new UDFArgumentTypeException(0, + "Only array<number> type argument is acceptable but " + OIs[0].getTypeName() + + " was passed as `input`"); } if (!HiveUtils.isListOI(OIs[1]) || !HiveUtils.isIntegerOI(((ListObjectInspector) OIs[1]).getListElementObjectInspector())) { - throw new UDFArgumentTypeException(0, "Only array<int> type argument is acceptable but " - + OIs[0].getTypeName() + " was passed as `indices`"); + throw new UDFArgumentTypeException(0, + "Only array<int> type argument is acceptable but " + OIs[0].getTypeName() + + " was passed as `indices`"); } inputOI = HiveUtils.asListOI(OIs[0]); @@ -50,8 +69,7 @@ public class SubarrayByIndicesUDF extends GenericUDF { indicesOI = HiveUtils.asListOI(OIs[1]); indexOI = HiveUtils.asIntegerOI(indicesOI.getListElementObjectInspector()); - return ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); } @Override http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java index 3dcbb93..1e54004 100644 --- a/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java +++ b/core/src/main/java/hivemall/tools/matrix/TransposeAndDotUDAF.java @@ -1,3 +1,21 @@ +/* + * Hivemall: Hive scalable Machine Learning Library + * + * Copyright (C) 2016 Makoto YUI + * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package hivemall.tools.matrix; import hivemall.utils.hadoop.HiveUtils; @@ -23,12 +41,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -@Description(name = "transpose_and_dot", - value = "_FUNC_(array<number> matrix0_row, array<number> matrix1_row)" + - " - Returns dot(matrix0.T, matrix1) as array<array<double>>, shape = (matrix0.#cols, matrix1.#cols)") +@Description( + name = "transpose_and_dot", + value = "_FUNC_(array<number> matrix0_row, array<number> matrix1_row)" + + " - Returns dot(matrix0.T, matrix1) as array<array<double>>, shape = (matrix0.#cols, matrix1.#cols)") public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { @Override - public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException { + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) + throws SemanticException { ObjectInspector[] OIs = info.getParameterObjectInspectors(); if (OIs.length != 2) { @@ -36,13 +56,15 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { } if (!HiveUtils.isNumberListOI(OIs[0])) { - throw new UDFArgumentTypeException(0, "Only array<number> type argument is acceptable but " - + OIs[0].getTypeName() + " was passed as `matrix0_row`"); + throw new UDFArgumentTypeException(0, + "Only array<number> type argument is acceptable but " + OIs[0].getTypeName() + + " was passed as `matrix0_row`"); } if (!HiveUtils.isNumberListOI(OIs[1])) { - throw new UDFArgumentTypeException(1, "Only array<number> type argument is acceptable but " - + OIs[1].getTypeName() + " was passed as `matrix1_row`"); + throw new UDFArgumentTypeException(1, + "Only array<number> type argument is acceptable but " + OIs[1].getTypeName() + + " was passed as `matrix1_row`"); } return new TransposeAndDotUDAFEvaluator(); @@ -69,9 +91,7 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { @Override public int estimate() { - return aggMatrix != null - ? aggMatrix.length * aggMatrix[0].length * 8 - : 0; + return aggMatrix != null ? aggMatrix.length * aggMatrix[0].length * 8 : 0; } public void init(int n, int m) { @@ -92,19 +112,17 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { super.init(mode, OIs); if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - matrix0RowOI = HiveUtils.asListOI( OIs[0]); + matrix0RowOI = HiveUtils.asListOI(OIs[0]); matrix0ElOI = HiveUtils.asDoubleCompatibleOI(matrix0RowOI.getListElementObjectInspector()); matrix1RowOI = HiveUtils.asListOI(OIs[1]); matrix1ElOI = HiveUtils.asDoubleCompatibleOI(matrix1RowOI.getListElementObjectInspector()); } else { - aggMatrixOI = HiveUtils.asListOI( OIs[0]); - aggMatrixRowOI = HiveUtils.asListOI(aggMatrixOI.getListElementObjectInspector()); + aggMatrixOI = HiveUtils.asListOI(OIs[0]); + aggMatrixRowOI = HiveUtils.asListOI(aggMatrixOI.getListElementObjectInspector()); aggMatrixElOI = HiveUtils.asDoubleOI(aggMatrixRowOI.getListElementObjectInspector()); } - return ObjectInspectorFactory.getStandardListObjectInspector( - ObjectInspectorFactory.getStandardListObjectInspector( - PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); + return ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)); } @Override @@ -124,11 +142,11 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { TransposeAndDotAggregationBuffer myAgg = (TransposeAndDotAggregationBuffer) agg; - if(matrix0Row==null){ - matrix0Row=new double[matrix0RowOI.getListLength(parameters[0])]; + if (matrix0Row == null) { + matrix0Row = new double[matrix0RowOI.getListLength(parameters[0])]; } - if(matrix1Row==null){ - matrix1Row=new double[matrix1RowOI.getListLength(parameters[1])]; + if (matrix1Row == null) { + matrix1Row = new double[matrix1RowOI.getListLength(parameters[1])]; } HiveUtils.toDoubleArray(parameters[0], matrix0RowOI, matrix0ElOI, matrix0Row, false); @@ -158,9 +176,9 @@ public final class TransposeAndDotUDAF extends AbstractGenericUDAFResolver { List matrix = aggMatrixOI.getList(other); final int n = matrix.size(); - final double[] row =new double[ aggMatrixRowOI.getListLength(matrix.get(0))]; + final double[] row = new double[aggMatrixRowOI.getListLength(matrix.get(0))]; for (int i = 0; i < n; i++) { - HiveUtils.toDoubleArray(matrix.get(i), aggMatrixRowOI, aggMatrixElOI,row,false); + HiveUtils.toDoubleArray(matrix.get(i), aggMatrixRowOI, aggMatrixElOI, row, false); if (myAgg.aggMatrix == null) { myAgg.init(n, row.length); http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java index dcbf534..9272e60 100644 --- a/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java +++ b/core/src/main/java/hivemall/utils/hadoop/HiveUtils.java @@ -1,7 +1,7 @@ /* * Hivemall: Hive scalable Machine Learning Library * - * Copyright (C) 2015 Makoto YUI + * Copyright (C) 2016 Makoto YUI * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -231,12 +231,14 @@ public final class HiveUtils { return category == Category.LIST; } - public static boolean isNumberListOI(@Nonnull final ObjectInspector oi){ - return isListOI(oi) && isNumberOI(((ListObjectInspector)oi).getListElementObjectInspector()); + public static boolean isNumberListOI(@Nonnull final ObjectInspector oi) { + return isListOI(oi) + && isNumberOI(((ListObjectInspector) oi).getListElementObjectInspector()); } public static boolean isNumberListListOI(@Nonnull final ObjectInspector oi) { - return isListOI(oi) && isNumberListOI(((ListObjectInspector)oi).getListElementObjectInspector()); + return isListOI(oi) + && isNumberListOI(((ListObjectInspector) oi).getListElementObjectInspector()); } public static boolean isPrimitiveTypeInfo(@Nonnull TypeInfo typeInfo) { http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/ad81b3aa/core/src/main/java/hivemall/utils/math/StatsUtils.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/hivemall/utils/math/StatsUtils.java b/core/src/main/java/hivemall/utils/math/StatsUtils.java index f9d0f30..d3b25c7 100644 --- a/core/src/main/java/hivemall/utils/math/StatsUtils.java +++ b/core/src/main/java/hivemall/utils/math/StatsUtils.java @@ -1,7 +1,7 @@ /* * Hivemall: Hive scalable Machine Learning Library * - * Copyright (C) 2015 Makoto YUI + * Copyright (C) 2016 Makoto YUI * Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -198,7 +198,8 @@ public final class StatsUtils { * @param expected mean vector whose value is expected * @return chi2 value */ - public static double chiSquare(@Nonnull final double[] observed, @Nonnull final double[] expected) { + public static double chiSquare(@Nonnull final double[] observed, + @Nonnull final double[] expected) { Preconditions.checkArgument(observed.length == expected.length); double sumObserved = 0.d; @@ -237,32 +238,38 @@ public final class StatsUtils { * @param expected means vector whose value is expected * @return p value */ - public static double chiSquareTest(@Nonnull final double[] observed, @Nonnull final double[] expected) { - ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, (double)expected.length - 1.d); - return 1.d - distribution.cumulativeProbability(chiSquare(observed,expected)); + public static double chiSquareTest(@Nonnull final double[] observed, + @Nonnull final double[] expected) { + ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, + (double) expected.length - 1.d); + return 1.d - distribution.cumulativeProbability(chiSquare(observed, expected)); } /** - * This method offers effective calculation for multiple entries rather than calculation individually + * This method offers effective calculation for multiple entries rather than calculation + * individually + * * @param observeds means matrix whose values are observed * @param expecteds means matrix * @return (chi2 value[], p value[]) */ - public static Map.Entry<double[],double[]> chiSquares(@Nonnull final double[][] observeds, @Nonnull final double[][] expecteds){ + public static Map.Entry<double[], double[]> chiSquares(@Nonnull final double[][] observeds, + @Nonnull final double[][] expecteds) { Preconditions.checkArgument(observeds.length == expecteds.length); final int len = expecteds.length; final int lenOfEach = expecteds[0].length; - final ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, (double)lenOfEach - 1.d); + final ChiSquaredDistribution distribution = new ChiSquaredDistribution(null, + (double) lenOfEach - 1.d); final double[] chi2s = new double[len]; final double[] ps = new double[len]; - for(int i=0;i<len;i++){ - chi2s[i] = chiSquare(observeds[i],expecteds[i]); + for (int i = 0; i < len; i++) { + chi2s[i] = chiSquare(observeds[i], expecteds[i]); ps[i] = 1.d - distribution.cumulativeProbability(chi2s[i]); } - return new AbstractMap.SimpleEntry<double[], double[]>(chi2s,ps); + return new AbstractMap.SimpleEntry<double[], double[]>(chi2s, ps); } }