This is an automated email from the ASF dual-hosted git repository. myui pushed a commit to branch libsvm in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git
commit 8dd14c1e8c818b6a84ddeb8c5dcc3bed7d708de1 Author: Makoto Yui <[email protected]> AuthorDate: Thu Jun 20 19:30:06 2019 +0900 Updated function usage page --- .../hivemall/ftvec/conv/ToLibSVMFormatUDF.java | 15 ++++++++-- docs/gitbook/misc/funcs.md | 33 ++++++++++++++++++---- 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java index a1c85d9..723cb0b 100644 --- a/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java +++ b/core/src/main/java/hivemall/ftvec/conv/ToLibSVMFormatUDF.java @@ -46,9 +46,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +// @formatter:off @Description(name = "to_libsvm_format", value = "_FUNC_(array<string> feautres [, double/integer target, const string options])" - + " - Returns a string representation of libsvm") + + " - Returns a string representation of libsvm", + extended = "Usage:\n" + + " select to_libsvm_format(array('apple:3.4','orange:2.1'))\n" + + " > 6284535:3.4 8104713:2.1\n" + + " select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10')\n" + + " > 3:2.1 7:3.4\n" + + " select to_libsvm_format(array('7:3.4','3:2.1'), 5.0)\n" + + " > 5.0 3:2.1 7:3.4") +// @formatter:on @UDFType(deterministic = true, stateful = false) public final class ToLibSVMFormatUDF extends UDFWithOptions { @@ -69,7 +78,9 @@ public final class ToLibSVMFormatUDF extends UDFWithOptions { @Override protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException { CommandLine cl = parseOptions(optionValue); - this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), _numFeatures); + this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), + MurmurHash3.DEFAULT_NUM_FEATURES); + assumeTrue(_numFeatures > 0, "num_features must be greater than 0: " + _numFeatures); return cl; } diff --git a/docs/gitbook/misc/funcs.md b/docs/gitbook/misc/funcs.md index ade9ee3..1b1b280 100644 --- a/docs/gitbook/misc/funcs.md +++ b/docs/gitbook/misc/funcs.md @@ -65,13 +65,25 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of GROUP BY feature ``` -- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `<int|bigint|string> feature, float weight`. Find PA-1 algorithm detail in http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf - -- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`. +- `train_pa1_regr(array<int|bigint|string> features, float target [, constant string options])` - PA-1 regressor that returns a relation consists of `(int|bigint|string) feature, float weight`. + ```sql + SELECT + feature, + avg(weight) as weight + FROM + (SELECT + train_pa1_regr(features,label) as (feature,weight) + FROM + training_data + ) t + GROUP BY feature + ``` +Reference: <a href="http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf" target="_blank">Koby Crammer et.al., Online Passive-Aggressive Algorithms. Journal of Machine Learning Research, 2006.</a><br/> +- `train_pa1a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`. -- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`. +- `train_pa2_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`. -- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `<int|bigint|string> feature, float weight`. +- `train_pa2a_regr(array<int|bigint|string> features, float target [, constant string options])` - Returns a relation consists of `(int|bigint|string) feature, float weight`. - `train_regressor(list<string|int|bigint> features, double label [, const string options])` - Returns a relation consists of <string|int|bigint feature, float weight> ``` @@ -261,6 +273,17 @@ Reference: <a href="https://papers.nips.cc/paper/3848-adaptive-regularization-of - `to_dense_features(array<string> feature_vector, int dimensions)` - Returns a dense feature in array<float> +- `to_libsvm_format(array<string> feautres [, double/integer target, const string options])` - Returns a string representation of libsvm + ```sql + Usage: + select to_libsvm_format(array('apple:3.4','orange:2.1')) + > 6284535:3.4 8104713:2.1 + select to_libsvm_format(array('apple:3.4','orange:2.1'), '-features 10') + > 3:2.1 7:3.4 + select to_libsvm_format(array('7:3.4','3:2.1'), 5.0) + > 5.0 3:2.1 7:3.4 + ``` + - `to_sparse_features(array<float> feature_vector)` - Returns a sparse feature in array<string> ## Feature hashing
