Repository: incubator-hivemall Updated Branches: refs/heads/master 448847fa2 -> 3a718713a
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java b/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java deleted file mode 100644 index cf10ed7..0000000 --- a/spark/spark-common/src/main/java/hivemall/dataset/LogisticRegressionDataGeneratorUDTFWrapper.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.dataset; - -import hivemall.UDTFWithOptions; - -import java.lang.reflect.Field; -import java.lang.reflect.Method; -import java.util.Random; - -import org.apache.commons.cli.CommandLine; -import org.apache.commons.cli.Options; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.generic.Collector; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; - -/** - * A wrapper of [[hivemall.dataset.LogisticRegressionDataGeneratorUDTF]]. This wrapper is needed - * because Spark cannot handle HadoopUtils#getTaskId() correctly. - */ -@Description(name = "lr_datagen", - value = "_FUNC_(options string) - Generates a logistic regression dataset") -public final class LogisticRegressionDataGeneratorUDTFWrapper extends UDTFWithOptions { - private transient LogisticRegressionDataGeneratorUDTF udtf = - new LogisticRegressionDataGeneratorUDTF(); - - @Override - protected Options getOptions() { - Options options = null; - try { - Method m = udtf.getClass().getDeclaredMethod("getOptions"); - m.setAccessible(true); - options = (Options) m.invoke(udtf); - } catch (Exception e) { - e.printStackTrace(); - } - return options; - } - - @SuppressWarnings("all") - @Override - protected CommandLine processOptions(ObjectInspector[] objectInspectors) - throws UDFArgumentException { - CommandLine commands = null; - try { - Method m = udtf.getClass().getDeclaredMethod("processOptions"); - m.setAccessible(true); - commands = (CommandLine) m.invoke(udtf, objectInspectors); - } catch (Exception e) { - e.printStackTrace(); - } - return commands; - } - - @Override - public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException { - try { - // Extract a collector for LogisticRegressionDataGeneratorUDTF - Field collector = GenericUDTF.class.getDeclaredField("collector"); - collector.setAccessible(true); - udtf.setCollector((Collector) collector.get(this)); - - // To avoid HadoopUtils#getTaskId() - Class<?> clazz = udtf.getClass(); - Field rnd1 = clazz.getDeclaredField("rnd1"); - Field rnd2 = clazz.getDeclaredField("rnd2"); - Field r_seed = clazz.getDeclaredField("r_seed"); - r_seed.setAccessible(true); - final long seed = r_seed.getLong(udtf) + (int) Thread.currentThread().getId(); - rnd1.setAccessible(true); - rnd2.setAccessible(true); - rnd1.set(udtf, new Random(seed)); - rnd2.set(udtf, new Random(seed + 1)); - } catch (Exception e) { - e.printStackTrace(); - } - return udtf.initialize(argOIs); - } - - @Override - public void process(Object[] objects) throws HiveException { - udtf.process(objects); - } - - @Override - public void close() throws HiveException { - udtf.close(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java deleted file mode 100644 index b454fd9..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/AddBiasUDFWrapper.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec; - -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; - -/** - * A wrapper of [[hivemall.ftvec.AddBiasUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<> - * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector. - */ -@Description(name = "add_bias", - value = "_FUNC_(features in array<string>) - Returns features with a bias as array<string>") -@UDFType(deterministic = true, stateful = false) -public class AddBiasUDFWrapper extends GenericUDF { - private AddBiasUDF udf = new AddBiasUDF(); - private ListObjectInspector argumentOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "add_bias() has an single arguments: array<string> features"); - } - - switch (arguments[0].getCategory()) { - case LIST: - argumentOI = (ListObjectInspector) arguments[0]; - ObjectInspector elmOI = argumentOI.getListElementObjectInspector(); - if (elmOI.getCategory().equals(Category.PRIMITIVE)) { - if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) { - break; - } - } - default: - throw new UDFArgumentTypeException(0, "Type mismatch: features"); - } - - return ObjectInspectorFactory.getStandardListObjectInspector(argumentOI.getListElementObjectInspector()); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - @SuppressWarnings("unchecked") - final List<String> input = (List<String>) argumentOI.getList(arguments[0].get()); - return udf.evaluate(input); - } - - @Override - public String getDisplayString(String[] children) { - return "add_bias(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java deleted file mode 100644 index 0b687db..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/AddFeatureIndexUDFWrapper.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec; - -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; - -/** - * A wrapper of [[hivemall.ftvec.AddFeatureIndexUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<> - * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector. - */ -@Description( - name = "add_feature_index", - value = "_FUNC_(dense features in array<double>) - Returns a feature vector with feature indices") -@UDFType(deterministic = true, stateful = false) -public class AddFeatureIndexUDFWrapper extends GenericUDF { - private AddFeatureIndexUDF udf = new AddFeatureIndexUDF(); - private ListObjectInspector argumentOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "add_feature_index() has an single arguments: array<double> features"); - } - - switch (arguments[0].getCategory()) { - case LIST: - argumentOI = (ListObjectInspector) arguments[0]; - ObjectInspector elmOI = argumentOI.getListElementObjectInspector(); - if (elmOI.getCategory().equals(Category.PRIMITIVE)) { - if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.DOUBLE) { - break; - } - } - default: - throw new UDFArgumentTypeException(0, "Type mismatch: features"); - } - - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - @SuppressWarnings("unchecked") - final List<Double> input = (List<Double>) argumentOI.getList(arguments[0].get()); - return udf.evaluate(input); - } - - @Override - public String getDisplayString(String[] children) { - return "add_feature_index(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java deleted file mode 100644 index 5924468..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractFeatureUDFWrapper.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec; - -import java.util.Arrays; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; - -/** - * A wrapper of [[hivemall.ftvec.ExtractFeatureUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<> - * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector. - */ -@Description(name = "extract_feature", - value = "_FUNC_(feature in string) - Returns a parsed feature as string") -@UDFType(deterministic = true, stateful = false) -public class ExtractFeatureUDFWrapper extends GenericUDF { - private ExtractFeatureUDF udf = new ExtractFeatureUDF(); - private PrimitiveObjectInspector argumentOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "extract_feature() has an single arguments: string feature"); - } - - argumentOI = (PrimitiveObjectInspector) arguments[0]; - if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) { - throw new UDFArgumentTypeException(0, "Type mismatch: feature"); - } - - return PrimitiveObjectInspectorFactory.javaStringObjectInspector; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get()); - return udf.evaluate(input); - } - - @Override - public String getDisplayString(String[] children) { - return "extract_feature(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java deleted file mode 100644 index 8580247..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/ExtractWeightUDFWrapper.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec; - -import java.util.Arrays; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; - -/** - * A wrapper of [[hivemall.ftvec.ExtractWeightUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle List<> - * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector. - */ -@Description(name = "extract_weight", - value = "_FUNC_(feature in string) - Returns the weight of a feature as string") -@UDFType(deterministic = true, stateful = false) -public class ExtractWeightUDFWrapper extends GenericUDF { - private ExtractWeightUDF udf = new ExtractWeightUDF(); - private PrimitiveObjectInspector argumentOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "extract_weight() has an single arguments: string feature"); - } - - argumentOI = (PrimitiveObjectInspector) arguments[0]; - if (argumentOI.getPrimitiveCategory() != PrimitiveCategory.STRING) { - throw new UDFArgumentTypeException(0, "Type mismatch: feature"); - } - - return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.DOUBLE); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - final String input = (String) argumentOI.getPrimitiveJavaObject(arguments[0].get()); - return udf.evaluate(input); - } - - @Override - public String getDisplayString(String[] children) { - return "extract_weight(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java deleted file mode 100644 index 584be6c..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/SortByFeatureUDFWrapper.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec; - -import java.util.Arrays; -import java.util.Map; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; - -/** - * A wrapper of [[hivemall.ftvec.SortByFeatureUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark cannot handle Map<> - * as a return type in Hive UDF. Therefore, the type must be passed via ObjectInspector. - */ -@Description(name = "sort_by_feature", - value = "_FUNC_(map in map<int,float>) - Returns a sorted map") -@UDFType(deterministic = true, stateful = false) -public class SortByFeatureUDFWrapper extends GenericUDF { - private SortByFeatureUDF udf = new SortByFeatureUDF(); - private MapObjectInspector argumentOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "sorted_by_feature() has an single arguments: map<int, float> map"); - } - - switch (arguments[0].getCategory()) { - case MAP: - argumentOI = (MapObjectInspector) arguments[0]; - ObjectInspector keyOI = argumentOI.getMapKeyObjectInspector(); - ObjectInspector valueOI = argumentOI.getMapValueObjectInspector(); - if (keyOI.getCategory().equals(Category.PRIMITIVE) - && valueOI.getCategory().equals(Category.PRIMITIVE)) { - final PrimitiveCategory keyCategory = ((PrimitiveObjectInspector) keyOI).getPrimitiveCategory(); - final PrimitiveCategory valueCategory = ((PrimitiveObjectInspector) valueOI).getPrimitiveCategory(); - if (keyCategory == PrimitiveCategory.INT - && valueCategory == PrimitiveCategory.FLOAT) { - break; - } - } - default: - throw new UDFArgumentTypeException(0, "Type mismatch: map"); - } - - - return ObjectInspectorFactory.getStandardMapObjectInspector( - argumentOI.getMapKeyObjectInspector(), argumentOI.getMapValueObjectInspector()); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - @SuppressWarnings("unchecked") - final Map<IntWritable, FloatWritable> input = (Map<IntWritable, FloatWritable>) argumentOI.getMap(arguments[0].get()); - return udf.evaluate(input); - } - - @Override - public String getDisplayString(String[] children) { - return "sort_by_feature(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java deleted file mode 100644 index db533be..0000000 --- a/spark/spark-common/src/main/java/hivemall/ftvec/scaling/L2NormalizationUDFWrapper.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.ftvec.scaling; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.Text; - -/** - * A wrapper of [[hivemall.ftvec.scaling.L2NormalizationUDF]]. - * - * NOTE: This is needed to avoid the issue of Spark reflection. That is, spark-1.3 cannot handle - * List<> as a return type in Hive UDF. The type must be passed via ObjectInspector. This issues has - * been reported in SPARK-6747, so a future release of Spark makes the wrapper obsolete. - */ -public class L2NormalizationUDFWrapper extends GenericUDF { - private L2NormalizationUDF udf = new L2NormalizationUDF(); - - private transient List<Text> retValue = new ArrayList<Text>(); - private transient Converter toListText = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException("normalize() has an only single argument."); - } - - switch (arguments[0].getCategory()) { - case LIST: - ObjectInspector elmOI = ((ListObjectInspector) arguments[0]).getListElementObjectInspector(); - if (elmOI.getCategory().equals(Category.PRIMITIVE)) { - if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) { - break; - } - } - default: - throw new UDFArgumentTypeException(0, - "normalize() must have List[String] as an argument, but " - + arguments[0].getTypeName() + " was found."); - } - - // Create a ObjectInspector converter for arguments - ObjectInspector outputElemOI = ObjectInspectorFactory.getReflectionObjectInspector( - Text.class, ObjectInspectorOptions.JAVA); - ObjectInspector outputOI = ObjectInspectorFactory.getStandardListObjectInspector(outputElemOI); - toListText = ObjectInspectorConverters.getConverter(arguments[0], outputOI); - - ObjectInspector listElemOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; - ObjectInspector returnElemOI = ObjectInspectorUtils.getStandardObjectInspector(listElemOI); - return ObjectInspectorFactory.getStandardListObjectInspector(returnElemOI); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 1); - @SuppressWarnings("unchecked") - final List<Text> input = (List<Text>) toListText.convert(arguments[0].get()); - retValue = udf.evaluate(input); - return retValue; - } - - @Override - public String getDisplayString(String[] children) { - return "normalize(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java deleted file mode 100644 index d3bcbe6..0000000 --- a/spark/spark-common/src/main/java/hivemall/knn/lsh/MinHashesUDFWrapper.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.knn.lsh; - -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.*; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; - -/** A wrapper of [[hivemall.knn.lsh.MinHashesUDF]]. */ -@Description( - name = "minhashes", - value = "_FUNC_(features in array<string>, noWeight in boolean) - Returns hashed features as array<int>") -@UDFType(deterministic = true, stateful = false) -public class MinHashesUDFWrapper extends GenericUDF { - private MinHashesUDF udf = new MinHashesUDF(); - private ListObjectInspector featuresOI = null; - private PrimitiveObjectInspector noWeightOI = null; - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 2) { - throw new UDFArgumentLengthException( - "minhashes() has 2 arguments: array<string> features, boolean noWeight"); - } - - // Check argument types - switch (arguments[0].getCategory()) { - case LIST: - featuresOI = (ListObjectInspector) arguments[0]; - ObjectInspector elmOI = featuresOI.getListElementObjectInspector(); - if (elmOI.getCategory().equals(Category.PRIMITIVE)) { - if (((PrimitiveObjectInspector) elmOI).getPrimitiveCategory() == PrimitiveCategory.STRING) { - break; - } - } - default: - throw new UDFArgumentTypeException(0, "Type mismatch: features"); - } - - noWeightOI = (PrimitiveObjectInspector) arguments[1]; - if (noWeightOI.getPrimitiveCategory() != PrimitiveCategory.BOOLEAN) { - throw new UDFArgumentException("Type mismatch: noWeight"); - } - - return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT)); - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 2); - @SuppressWarnings("unchecked") - final List<String> features = (List<String>) featuresOI.getList(arguments[0].get()); - final Boolean noWeight = PrimitiveObjectInspectorUtils.getBoolean(arguments[1].get(), - noWeightOI); - return udf.evaluate(features, noWeight); - } - - @Override - public String getDisplayString(String[] children) { - /** - * TODO: Need to return hive-specific type names. - */ - return "minhashes(" + Arrays.toString(children) + ")"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java b/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java deleted file mode 100644 index f386223..0000000 --- a/spark/spark-common/src/main/java/hivemall/tools/mapred/RowIdUDFWrapper.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall.tools.mapred; - -import java.util.UUID; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.UDFType; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; - -/** An alternative implementation of [[hivemall.tools.mapred.RowIdUDF]]. */ -@Description( - name = "rowid", - value = "_FUNC_() - Returns a generated row id of a form {TASK_ID}-{UUID}-{SEQUENCE_NUMBER}") -@UDFType(deterministic = false, stateful = true) -public class RowIdUDFWrapper extends GenericUDF { - // RowIdUDF is directly used because spark cannot - // handle HadoopUtils#getTaskId(). - - private long sequence; - private long taskId; - - public RowIdUDFWrapper() { - this.sequence = 0L; - this.taskId = Thread.currentThread().getId(); - } - - @Override - public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 0) { - throw new UDFArgumentLengthException("row_number() has no argument."); - } - - return PrimitiveObjectInspectorFactory.javaStringObjectInspector; - } - - @Override - public Object evaluate(DeferredObject[] arguments) throws HiveException { - assert (arguments.length == 0); - sequence++; - /** - * TODO: Check if it is unique over all tasks in executors of Spark. - */ - return taskId + "-" + UUID.randomUUID() + "-" + sequence; - } - - @Override - public String getDisplayString(String[] children) { - return "row_number()"; - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/hivemall/HivemallException.scala ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala b/spark/spark-common/src/main/scala/hivemall/HivemallException.scala deleted file mode 100644 index 53f6756..0000000 --- a/spark/spark-common/src/main/scala/hivemall/HivemallException.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package hivemall - -class HivemallException(message: String, cause: Throwable) - extends Exception(message, cause) { - - def this(message: String) = this(message, null) -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala b/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala deleted file mode 100644 index 3fb2d18..0000000 --- a/spark/spark-common/src/main/scala/org/apache/spark/ml/feature/HivemallLabeledPoint.scala +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.spark.ml.feature - -import java.util.StringTokenizer - -import scala.collection.mutable.ListBuffer - -import hivemall.HivemallException - -// Used for DataFrame#explode -case class HivemallFeature(feature: String) - -/** - * Class that represents the features and labels of a data point for Hivemall. - * - * @param label Label for this data point. - * @param features List of features for this data point. - */ -case class HivemallLabeledPoint(label: Float = 0.0f, features: Seq[String]) { - override def toString: String = { - "%s,%s".format(label, features.mkString("[", ",", "]")) - } -} - -object HivemallLabeledPoint { - - // Simple parser for HivemallLabeledPoint - def parse(s: String): HivemallLabeledPoint = { - val (label, features) = s.indexOf(',') match { - case d if d > 0 => (s.substring(0, d), s.substring(d + 1)) - case _ => ("0.0", "[]") // Dummy - } - HivemallLabeledPoint(label.toFloat, parseTuple(new StringTokenizer(features, "[],", true))) - } - - // TODO: Support to parse rows without labels - private[this] def parseTuple(tokenizer: StringTokenizer): Seq[String] = { - val items = ListBuffer.empty[String] - var parsing = true - var allowDelim = false - while (parsing && tokenizer.hasMoreTokens()) { - val token = tokenizer.nextToken() - if (token == "[") { - items ++= parseTuple(tokenizer) - parsing = false - allowDelim = true - } else if (token == ",") { - if (allowDelim) { - allowDelim = false - } else { - throw new HivemallException("Found ',' at a wrong position.") - } - } else if (token == "]") { - parsing = false - } else { - items.append(token) - allowDelim = true - } - } - if (parsing) { - throw new HivemallException(s"A tuple must end with ']'.") - } - items - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala ---------------------------------------------------------------------- diff --git a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala b/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala deleted file mode 100644 index a6bbb4b..0000000 --- a/spark/spark-common/src/main/scala/org/apache/spark/streaming/HivemallStreamingOps.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.spark.streaming - -import scala.reflect.ClassTag - -import org.apache.spark.ml.feature.HivemallLabeledPoint -import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Row, SQLContext} -import org.apache.spark.streaming.dstream.DStream - -final class HivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) { - - def predict[U: ClassTag](f: DataFrame => DataFrame)(implicit sqlContext: SQLContext) - : DStream[Row] = { - ds.transform[Row] { rdd: RDD[HivemallLabeledPoint] => - f(sqlContext.createDataFrame(rdd)).rdd - } - } -} - -object HivemallStreamingOps { - - /** - * Implicitly inject the [[HivemallStreamingOps]] into [[DStream]]. - */ - implicit def dataFrameToHivemallStreamingOps(ds: DStream[HivemallLabeledPoint]) - : HivemallStreamingOps = { - new HivemallStreamingOps(ds) - } -} http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/src/site/resources/LICENSE-font_awesome.txt ---------------------------------------------------------------------- diff --git a/src/site/resources/LICENSE-font_awesome.txt b/src/site/resources/LICENSE-font_awesome.txt new file mode 100644 index 0000000..ad1f9ac --- /dev/null +++ b/src/site/resources/LICENSE-font_awesome.txt @@ -0,0 +1,86 @@ +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting -- in part or in whole -- any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/lib/xgboost4j-0.60-0.10.jar ---------------------------------------------------------------------- diff --git a/xgboost/lib/xgboost4j-0.60-0.10.jar b/xgboost/lib/xgboost4j-0.60-0.10.jar deleted file mode 100644 index cf1599b..0000000 Binary files a/xgboost/lib/xgboost4j-0.60-0.10.jar and /dev/null differ http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/pom.xml ---------------------------------------------------------------------- diff --git a/xgboost/pom.xml b/xgboost/pom.xml index b9f11b8..8dcb45e 100644 --- a/xgboost/pom.xml +++ b/xgboost/pom.xml @@ -16,14 +16,13 @@ specific language governing permissions and limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.apache.hivemall</groupId> <artifactId>hivemall</artifactId> - <version>0.5.0-incubating-SNAPSHOT</version> + <version>0.5.1-incubating-SNAPSHOT</version> <relativePath>../pom.xml</relativePath> </parent> @@ -32,8 +31,6 @@ <packaging>jar</packaging> <properties> - <xgboost.version>0.60</xgboost.version> - <xgboost4j.version>0.10</xgboost4j.version> <main.basedir>${project.parent.basedir}</main.basedir> </properties> @@ -42,69 +39,45 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <version>${hadoop.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> - <version>${hadoop.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> - <version>${hive.version}</version> <scope>provided</scope> - <exclusions> - <exclusion> - <artifactId>jetty</artifactId> - <groupId>org.mortbay.jetty</groupId> - </exclusion> - <exclusion> - <groupId>javax.jdo</groupId> - <artifactId>jdo2-api</artifactId> - </exclusion> - <exclusion> - <groupId>asm-parent</groupId> - <artifactId>asm-parent</artifactId> - </exclusion> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>commons-cli</groupId> <artifactId>commons-cli</artifactId> - <version>1.2</version> <scope>provided</scope> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> - <version>1.0.4</version> <scope>provided</scope> </dependency> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> - <version>1.2.17</version> <scope>provided</scope> </dependency> + + <!-- compile scope --> <dependency> <groupId>org.apache.hivemall</groupId> <artifactId>hivemall-core</artifactId> <version>${project.version}</version> - <scope>provided</scope> + <scope>compile</scope> </dependency> - - <!-- compile scope --> <dependency> - <groupId>ml.dmlc</groupId> + <groupId>io.github.myui</groupId> <artifactId>xgboost4j</artifactId> - <version>${xgboost4j.version}</version> + <version>${xgboost.version}</version> <scope>compile</scope> </dependency> <dependency> @@ -116,106 +89,4 @@ </dependency> </dependencies> - <build> - <directory>target</directory> - <outputDirectory>target/classes</outputDirectory> - <finalName>${project.artifactId}-${project.version}</finalName> - <testOutputDirectory>target/test-classes</testOutputDirectory> - <plugins> - <!-- TODO: This is hacky, so we'll replace this with another better way in a future --> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-install-plugin</artifactId> - <version>2.4</version> - <executions> - <execution> - <id>install-xgboost</id> - <phase>validate</phase> - <configuration> - <file>${basedir}/lib/xgboost4j-${xgboost.version}-${xgboost4j.version}.jar</file> - <repositoryLayout>default</repositoryLayout> - <groupId>ml.dmlc</groupId> - <artifactId>xgboost4j</artifactId> - <version>${xgboost4j.version}</version> - <packaging>jar</packaging> - <generatePom>true</generatePom> - </configuration> - <goals> - <goal>install-file</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>3.1.0</version> - <executions> - <!-- hivemall-xgboost_xx-xx.jar --> - <execution> - <id>jar-with-portal-binaries</id> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${xgboost.version}-${project.version}</finalName> - <outputDirectory>${project.parent.build.directory}</outputDirectory> - <minimizeJar>false</minimizeJar> - <createDependencyReducedPom>false</createDependencyReducedPom> - <artifactSet> - <includes> - <include>ml.dmlc:xgboost4j</include> - </includes> - </artifactSet> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>tracker.py</exclude> - </excludes> - </filter> - </filters> - </configuration> - </execution> - <!-- hivemall-xgboost_xx-xx-with-dependencies.jar including minimum dependencies --> - <execution> - <id>jar-with-dependencies</id> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <finalName>${project.artifactId}-${xgboost.version}-${project.version}-with-dependencies</finalName> - <outputDirectory>${project.parent.build.directory}</outputDirectory> - <minimizeJar>true</minimizeJar> - <createDependencyReducedPom>false</createDependencyReducedPom> - <artifactSet> - <includes> - <include>org.apache.hivemall:hivemall-core</include> - <include>io.netty:netty-all</include> - <include>com.github.haifengl:smile-core</include> - <include>com.github.haifengl:smile-math</include> - <include>com.github.haifengl:smile-data</include> - <include>org.tukaani:xz</include> - <include>ml.dmlc:xgboost4j</include> - <include>com.esotericsoftware.kryo:kryo</include> - </includes> - </artifactSet> - <filters> - <filter> - <artifact>*:*</artifact> - <excludes> - <exclude>*.jar</exclude> - <exclude>tracker.py</exclude> - </excludes> - </filter> - </filters> - </configuration> - </execution> - </executions> - </plugin> - </plugins> - </build> - </project> http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java ---------------------------------------------------------------------- diff --git a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java index 2e2bf25..0472229 100644 --- a/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java +++ b/xgboost/src/main/java/hivemall/xgboost/XGBoostUtils.java @@ -48,7 +48,9 @@ public final class XGBoostUtils { values[i] = Float.parseFloat(str.substring(pos + 1)); } } - return LabeledPoint.fromSparseVector((float) target, indices, values); + + + return new LabeledPoint((float) target, indices, values); } } http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/3a718713/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java ---------------------------------------------------------------------- diff --git a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java index fd67c09..b80f95a 100644 --- a/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java +++ b/xgboost/src/main/java/hivemall/xgboost/tools/XGBoostMulticlassPredictUDTF.java @@ -18,8 +18,6 @@ */ package hivemall.xgboost.tools; -import hivemall.utils.lang.Preconditions; - import java.util.ArrayList; import java.util.List; @@ -32,10 +30,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -@Description( - name = "xgboost_multiclass_predict", +import hivemall.utils.lang.Preconditions; + +@Description(name = "xgboost_multiclass_predict", value = "_FUNC_(string rowid, string[] features, string model_id, array<byte> pred_model [, string options]) " - + "- Returns a prediction result as (string rowid, int label, float probability)") + + "- Returns a prediction result as (string rowid, string label, float probability)") public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoostPredictUDTF { public XGBoostMulticlassPredictUDTF() { @@ -65,14 +64,14 @@ public final class XGBoostMulticlassPredictUDTF extends hivemall.xgboost.XGBoost final Object[] forwardObj = new Object[3]; for (int i = 0, size = testData.size(); i < size; i++) { final float[] predicted_i = predicted[i]; - final String rowId = testData.get(i).getRowId(); + String rowId = testData.get(i).getRowId(); forwardObj[0] = rowId; assert (predicted_i.length > 1); for (int j = 0; j < predicted_i.length; j++) { - forwardObj[1] = j; + forwardObj[1] = String.valueOf(j); float prob = predicted_i[j]; - forwardObj[2] = prob; + forwardObj[2] = Float.valueOf(prob); forward(forwardObj); } }
