Documentation: Remove online examples from sql functions.
JIRA: MADLIB-1260
For a madlib module, we can call
`select madlib_schema.module_name('example');` to print out examples of this
module.
They are hard to maintain and not that useful since we already have examples in
our user documentation http://madlib.apache.org/docs/latest/index.html/.
We are going to remove those examples for every module that has it, and make
sure madlib throw out proper error message when user calls it.
Colses #302
Co-authored-by: Orhan Kislal <[email protected]>
Co-authored-by: Nandish Jayaram <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/24a11c1e
Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/24a11c1e
Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/24a11c1e
Branch: refs/heads/master
Commit: 24a11c1e5863ab5fe0d7257904d70908c6e0c6b9
Parents: cf5ace9
Author: Orhan Kislal <[email protected]>
Authored: Wed Aug 15 12:27:50 2018 -0700
Committer: Orhan Kislal <[email protected]>
Committed: Wed Aug 15 12:32:34 2018 -0700
----------------------------------------------------------------------
.../modules/assoc_rules/assoc_rules.py_in | 63 +-
src/ports/postgres/modules/convex/mlp_igd.py_in | 375 +----------
.../modules/elastic_net/elastic_net.py_in | 125 ----
src/ports/postgres/modules/glm/glm.py_in | 181 ------
src/ports/postgres/modules/glm/multinom.py_in | 97 ---
src/ports/postgres/modules/glm/ordinal.py_in | 97 ---
src/ports/postgres/modules/graph/apsp.py_in | 72 ---
src/ports/postgres/modules/graph/bfs.py_in | 58 --
src/ports/postgres/modules/graph/hits.py_in | 86 +--
src/ports/postgres/modules/graph/measures.py_in | 120 +---
src/ports/postgres/modules/graph/pagerank.py_in | 109 +---
src/ports/postgres/modules/graph/sssp.py_in | 73 ---
src/ports/postgres/modules/graph/wcc.py_in | 125 +---
src/ports/postgres/modules/knn/knn.py_in | 134 +---
.../modules/linalg/matrix_help_message.py_in | 624 +++----------------
src/ports/postgres/modules/linalg/svd.py_in | 46 --
.../linear_systems/dense_linear_systems.py_in | 15 -
.../linear_systems/sparse_linear_systems.py_in | 19 -
src/ports/postgres/modules/pca/pca.py_in | 114 +---
.../postgres/modules/pca/pca_project.py_in | 161 +----
.../postgres/modules/pmml/table_to_pmml.py_in | 38 --
.../recursive_partitioning/decision_tree.py_in | 65 --
.../recursive_partitioning/random_forest.py_in | 66 --
src/ports/postgres/modules/regress/linear.py_in | 43 --
.../postgres/modules/regress/logistic.py_in | 48 --
.../modules/regress/multilogistic.py_in | 86 ---
.../modules/sample/balance_sample.py_in | 55 --
.../modules/sample/stratified_sample.py_in | 59 +-
.../modules/sample/stratified_sample.sql_in | 6 +-
.../modules/sample/train_test_split.py_in | 73 +--
.../postgres/modules/stats/correlation.py_in | 57 --
.../modules/stats/cox_prop_hazards.py_in | 51 --
.../postgres/modules/summary/summary.py_in | 40 --
src/ports/postgres/modules/svm/svm.py_in | 155 -----
.../utilities/minibatch_preprocessing.py_in | 50 --
src/ports/postgres/modules/utilities/path.py_in | 69 --
.../postgres/modules/utilities/sessionize.py_in | 84 +--
37 files changed, 119 insertions(+), 3620 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in
b/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in
index abc8b50..243851d 100644
--- a/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in
+++ b/src/ports/postgres/modules/assoc_rules/assoc_rules.py_in
@@ -564,65 +564,6 @@ in each row, with the following columns:
independent, to the observed support of X occuring
without Y
""".format(schema_madlib=schema_madlib)
else:
- if message.lower() in ("example", "examples"):
- return """
-------------------------------------------------------------------------
- EXAMPLES
-------------------------------------------------------------------------
-DROP TABLE IF EXISTS test_data;
-CREATE TABLE test_data (
- trans_id INT,
- product TEXT
-);
-INSERT INTO test_data VALUES (1, 'beer');
-INSERT INTO test_data VALUES (1, 'diapers');
-INSERT INTO test_data VALUES (1, 'chips');
-INSERT INTO test_data VALUES (2, 'beer');
-INSERT INTO test_data VALUES (2, 'diapers');
-INSERT INTO test_data VALUES (3, 'beer');
-INSERT INTO test_data VALUES (3, 'diapers');
-INSERT INTO test_data VALUES (4, 'beer');
-INSERT INTO test_data VALUES (4, 'chips');
-INSERT INTO test_data VALUES (5, 'beer');
-INSERT INTO test_data VALUES (6, 'beer');
-INSERT INTO test_data VALUES (6, 'diapers');
-INSERT INTO test_data VALUES (6, 'chips');
-INSERT INTO test_data VALUES (7, 'beer');
-INSERT INTO test_data VALUES (7, 'diapers');
-
-Find all association rules with a support and threshold value of
-at least 0.25 and 0.5 respectively.
-
-SELECT * FROM {schema_madlib}.assoc_rules( .25,
- .5,
- 'trans_id',
- 'product',
- 'test_data',
- NULL,
- TRUE
- );
-
-View output results:
-SELECT * FROM assoc_rules;
-
-Find association rules generated from itemsets of size at most 2,
-and a support and threshold value of at least 0.25 and 0.5 respectively.
-
-SELECT * FROM {schema_madlib}.assoc_rules( .25,
- .5,
- 'trans_id',
- 'product',
- 'test_data',
- NULL,
- TRUE,
- 2
- );
-
-View output results:
-SELECT * FROM assoc_rules;
- """.format(schema_madlib=schema_madlib)
- else:
- return """
+ return """
For an overview on usage, run: SELECT {schema_madlib}.assoc_rules('usage');
-For an example of using assoc_rules, run: SELECT
{schema_madlib}.assoc_rules('example');
- """.format(schema_madlib=schema_madlib)
+ """.format(schema_madlib=schema_madlib)
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/convex/mlp_igd.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/convex/mlp_igd.py_in
b/src/ports/postgres/modules/convex/mlp_igd.py_in
index 7df44ec..0126d31 100644
--- a/src/ports/postgres/modules/convex/mlp_igd.py_in
+++ b/src/ports/postgres/modules/convex/mlp_igd.py_in
@@ -1134,10 +1134,7 @@ def mlp_help(schema_madlib, message, is_classification):
functions.
For more details on function usage:
- SELECT {schema_madlib}.{method}('usage')
-
- For a small example on using the function:
- SELECT {schema_madlib}.{method}('example')""".format(**args)
+ SELECT {schema_madlib}.{method}('usage')""".format(**args)
usage = """
---------------------------------------------------------------------------
@@ -1239,304 +1236,6 @@ def mlp_help(schema_madlib, message, is_classification):
""".format(**args)
- regression_example = """
- -- Create input table
-
- CREATE TABLE lin_housing (id serial, x float8[], zipcode int, y float8);
- COPY lin_housing (x, zipcode, y) FROM STDIN NULL '?' DELIMITER '|';
-
{{1,0.00632,18.00,2.310,0,0.5380,6.5750,65.20,4.0900,1,296.0,15.30,396.90,4.98}}|94016|24.00
-
{{1,0.02731,0.00,7.070,0,0.4690,6.4210,78.90,4.9671,2,242.0,17.80,396.90,9.14}}|94016|21.60
-
{{1,0.02729,0.00,7.070,0,0.4690,7.1850,61.10,4.9671,2,242.0,17.80,392.83,4.03}}|94016|34.70
-
{{1,0.03237,0.00,2.180,0,0.4580,6.9980,45.80,6.0622,3,222.0,18.70,394.63,2.94}}|94016|33.40
-
{{1,0.06905,0.00,2.180,0,0.4580,7.1470,54.20,6.0622,3,222.0,18.70,396.90,5.33}}|94016|36.20
-
{{1,0.02985,0.00,2.180,0,0.4580,6.4300,58.70,6.0622,3,222.0,18.70,394.12,5.21}}|94016|28.70
-
{{1,0.08829,12.50,7.870,0,0.5240,6.0120,66.60,5.5605,5,311.0,15.20,395.60,12.43}}|94016|22.90
-
{{1,0.14455,12.50,7.870,0,0.5240,6.1720,96.10,5.9505,5,311.0,15.20,396.90,19.15}}|94016|27.10
-
{{1,0.21124,12.50,7.870,0,0.5240,5.6310,100.00,6.0821,5,311.0,15.20,386.63,29.93}}|94016|16.50
-
{{1,0.17004,12.50,7.870,0,0.5240,6.0040,85.90,6.5921,5,311.0,15.20,386.71,17.10}}|94016|18.90
-
{{1,0.22489,12.50,7.870,0,0.5240,6.3770,94.30,6.3467,5,311.0,15.20,392.52,20.45}}|94016|15.00
-
{{1,0.11747,12.50,7.870,0,0.5240,6.0090,82.90,6.2267,5,311.0,15.20,396.90,13.27}}|20001|18.90
-
{{1,0.09378,12.50,7.870,0,0.5240,5.8890,39.00,5.4509,5,311.0,15.20,390.50,15.71}}|20001|21.70
-
{{1,0.62976,0.00,8.140,0,0.5380,5.9490,61.80,4.7075,4,307.0,21.00,396.90,8.26}}|20001|20.40
-
{{1,0.63796,0.00,8.140,0,0.5380,6.0960,84.50,4.4619,4,307.0,21.00,380.02,10.26}}|20001|18.20
-
{{1,0.62739,0.00,8.140,0,0.5380,5.8340,56.50,4.4986,4,307.0,21.00,395.62,8.47}}|20001|19.90
-
{{1,1.05393,0.00,8.140,0,0.5380,5.9350,29.30,4.4986,4,307.0,21.00,386.85,6.58}}|20001|
23.10
-
{{1,0.78420,0.00,8.140,0,0.5380,5.9900,81.70,4.2579,4,307.0,21.00,386.75,14.67}}|20001|17.50
-
{{1,0.80271,0.00,8.140,0,0.5380,5.4560,36.60,3.7965,4,307.0,21.00,288.99,11.69}}|20001|20.20
-
{{1,0.72580,0.00,8.140,0,0.5380,5.7270,69.50,3.7965,4,307.0,21.00,390.95,11.28}}|20001|18.20
- \.
-
- -- Generate a multilayer perception with a two hidden layers of 25 units
- -- each. Use the x column as the independent variables, and use the class
- -- column as the classification. Set the tolerance to 0 so that 500
- -- iterations will be run. Use a sigmoid activation function.
- -- The model will be written to mlp_regress_result.
-
- DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary,
mlp_regress_standardization;
- SELECT {schema_madlib}.{method}(
- 'lin_housing', -- Source table
- 'mlp_regress', -- Desination table
- 'x', -- Input features
- 'y', -- Dependent variable
- ARRAY[25,25], -- Number of units per layer
- 'learning_rate_init=0.001,
- n_iterations=500,
- lambda=0.001,
- tolerance=0', -- Optimizer params
- 'relu', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
- SELECT * FROM mlp_regress;
-
- -- Use the n_tries optimizer param to learn the best of multiple models:
- DROP TABLE IF EXISTS mlp_regress, mlp_regress_summary,
mlp_regress_standardization;
- SELECT {schema_madlib}.{method}(
- 'lin_housing', -- Source table
- 'mlp_regress', -- Desination table
- 'x', -- Input features
- 'y', -- Dependent variable
- ARRAY[25,25], -- Number of units per layer
- 'learning_rate_init=0.001,
- n_iterations=50,
- n_tries=3,
- lambda=0.001,
- tolerance=0', -- Optimizer params, with n_tries
- 'relu', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
- SELECT * FROM mlp_regress;
-
- -- Use the warm start param to improve the model present in mlp_regress.
- -- Note that mlp_regress should not be dropped.
- SELECT {schema_madlib}.{method}(
- 'lin_housing', -- Source table
- 'mlp_regress', -- Desination table
- 'x', -- Input features
- 'y', -- Dependent variable
- ARRAY[25,25], -- Number of units per layer
- 'learning_rate_init=0.001,
- n_iterations=50,
- n_tries=3
- lambda=0.001,
- tolerance=0',
- 'relu', -- Activation function
- NULL, -- Default weight (1)
- TRUE, -- Warm start
- FALSE -- Verbose
- );
- SELECT * FROM mlp_regress;
-
- -- Use the grouping feature to learn a different model for each zipcode:
- DROP TABLE IF EXISTS mlp_regress_group, mlp_regress_group_summary;
- DROP TABLE IF EXISTS mlp_regress_group_standardization;
- SELECT {schema_madlib}.{method}(
- 'lin_housing', -- Source table
- 'mlp_regress_group', -- Desination table
- 'x', -- Input features
- 'y', -- Dependent variable
- ARRAY[25,25], -- Number of units per layer
- 'learning_rate_init=0.001,
- n_iterations=50,
- lambda=0.001,
- tolerance=0', -- Optimizer params, with n_tries
- 'relu', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE, -- Not verbose
- 'zipcode' -- Grouping column
- );
- SELECT * FROM mlp_regress_group;
-
- -- n_tries and warm_start can be used with grouping too, similar to as
- -- shown above without grouping.
-
- -- Pre-process source table so that the solver uses mini-batch gradient
descent.
- DROP TABLE IF EXISTS lin_housing_batch, lin_housing_batch_summary;
- DROP TABLE IF EXISTS lin_housing_batch_standardization;
- SELECT {schema_madlib}.minibatch_preprocessor(
- 'lin_housing', -- Source table
- 'lin_housing_batch', -- Destination table of preprocessor
- 'y', -- Dependent variable
- 'x', -- Independent variable
- 10 -- Buffer size (optional)
- );
-
- -- Train MLP with lin_housing_batch, the solver automatically uses
mini-batch
- -- gradient descent.
- DROP TABLE IF EXISTS mlp_regress_group, mlp_regress_group_summary;
- DROP TABLE IF EXISTS mlp_regress_group_standardization;
- SELECT {schema_madlib}.{method}(
- 'lin_housing_batch', -- Source table
- 'mlp_regress_batch', -- Desination table
- 'independent_varname', -- Input features
- 'dependent_varname', -- Dependent variable
- ARRAY[25,25], -- Number of units per layer
- 'learning_rate_init=0.001,
- n_iterations=50,
- lambda=0.001,
- tolerance=0',
- n_epochs=20, -- Optimizer params, with n_tries
- 'relu', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
- SELECT * FROM mlp_regress_batch;
- """
-
- classification_example = """
- -- Create input table
-
- CREATE TABLE iris_data(
- id INTEGER,
- attributes NUMERIC[],
- class_text VARCHAR,
- class INTEGER,
- state VARCHAR
- );
-
- COPY iris_data (attributes, class_text, class, state) FROM STDIN NULL '?'
DELIMITER '|';
- {{4.4,3.2,1.3,0.2}}|Iris_setosa|1|Alaska
- {{5.0,3.5,1.6,0.6}}|Iris_setosa|1|Alaska
- {{5.1,3.8,1.9,0.4}}|Iris_setosa|1|Alaska
- {{4.8,3.0,1.4,0.3}}|Iris_setosa|1|Alaska
- {{5.1,3.8,1.6,0.2}}|Iris_setosa|1|Alaska
- {{5.7,2.8,4.5,1.3}}|Iris_versicolor|2|Alaska
- {{6.3,3.3,4.7,1.6}}|Iris_versicolor|2|Alaska
- {{4.9,2.4,3.3,1.0}}|Iris_versicolor|2|Alaska
- {{6.6,2.9,4.6,1.3}}|Iris_versicolor|2|Alaska
- {{5.2,2.7,3.9,1.4}}|Iris_versicolor|2|Alaska
- {{5.0,2.0,3.5,1.0}}|Iris_versicolor|2|Alaska
- {{4.8,3.0,1.4,0.1}}|Iris_setosa|1|Tennessee
- {{4.3,3.0,1.1,0.1}}|Iris_setosa|1|Tennessee
- {{5.8,4.0,1.2,0.2}}|Iris_setosa|1|Tennessee
- {{5.7,4.4,1.5,0.4}}|Iris_setosa|1|Tennessee
- {{5.4,3.9,1.3,0.4}}|Iris_setosa|1|Tennessee
- {{6.0,2.9,4.5,1.5}}|Iris_versicolor|2|Tennessee
- {{5.7,2.6,3.5,1.0}}|Iris_versicolor|2|Tennessee
- {{5.5,2.4,3.8,1.1}}|Iris_versicolor|2|Tennessee
- {{5.5,2.4,3.7,1.0}}|Iris_versicolor|2|Tennessee
- {{5.8,2.7,3.9,1.2}}|Iris_versicolor|2|Tennessee
- {{6.0,2.7,5.1,1.6}}|Iris_versicolor|2|Tennessee
- \.
-
-
- -- Generate a multilayer perception with a single hidden layer of 5 units.
- -- Use the attributes column as the independent variables, and use the
class
- -- column as the classification. Set the tolerance to 0 so that 500
- -- iterations will be run. Use a hyperbolic tangent activation function.
- -- The model will be written to mlp_model.
-
- DROP TABLE IF EXISTS mlp_model, mlp_model_summary,
mlp_model_standardization;
- SELECT madlib.mlp_classification(
- 'iris_data', -- Source table
- 'mlp_model', -- Destination table
- 'attributes', -- Input features
- 'class_text', -- Label
- ARRAY[5], -- Number of units per layer
- 'learning_rate_init=0.003,
- n_iterations=500,
- tolerance=0', -- Optimizer params
- 'tanh', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
-
- SELECT * FROM mlp_model;
-
- -- Use the n_tries optimizer param to learn the best of multiple models:
- DROP TABLE IF EXISTS mlp_model, mlp_model_summary,
mlp_model_standardization;
- SELECT madlib.mlp_classification(
- 'iris_data', -- Source table
- 'mlp_model', -- Destination table
- 'attributes', -- Input features
- 'class_text', -- Label
- ARRAY[5], -- Number of units per layer
- 'learning_rate_init=0.003,
- n_iterations=500,
- n_tries=3,
- tolerance=0', -- Optimizer params, with n_tries
- 'tanh', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
-
- -- Use the warm start param to improve the model present in mlp_model.
- -- Note that mlp_model should not be dropped.
- SELECT madlib.mlp_classification(
- 'iris_data', -- Source table
- 'mlp_model', -- Destination table
- 'attributes', -- Input features
- 'class_text', -- Label
- ARRAY[5], -- Number of units per layer
- 'learning_rate_init=0.003,
- n_iterations=500,
- tolerance=0', -- Optimizer params
- 'tanh', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- Warm start
- FALSE -- Not verbose
- );
-
- -- Use the grouping feature to learn a different model for each state:
- DROP TABLE IF EXISTS mlp_model_group, mlp_model_group_summary;
- DROP TABLE IF EXISTS mlp_model_group_standardization;
- SELECT madlib.mlp_classification(
- 'iris_data', -- Source table
- 'mlp_model_group',-- Destination table
- 'attributes', -- Input features
- 'class_text', -- Label
- ARRAY[5], -- Number of units per layer
- 'learning_rate_init=0.003,
- n_iterations=500,
- tolerance=0', -- Optimizer params
- 'tanh', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE, -- Not verbose
- 'state' -- Grouping column
- );
-
- -- n_tries and warm_start can be used with grouping too, similar to as
- -- shown above without grouping.
-
- -- Pre-process source table so that the solver uses mini-batch gradient
descent.
- DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_summary;
- DROP TABLE IF EXISTS iris_data_batch_standardization;
- SELECT {schema_madlib}.minibatch_preprocessor(
- 'iris_data', -- Source table
- 'iris_data_batch', -- Destination table of preprocessor
- 'y', -- Dependent variable
- 'x' -- Independent variable
- );
-
- -- Train MLP with lin_housing_batch, the solver automatically uses
mini-batch
- -- gradient descent.
- DROP TABLE IF EXISTS mlp_model_batch, mlp_model_batch_summary;
- DROP TABLE IF EXISTS mlp_model_batch_standardization;
- SELECT madlib.mlp_classification(
- 'iris_data_batch', -- Source table
- 'mlp_model_batch', -- Destination table
- 'attributes', -- Input features
- 'class_text', -- Label
- ARRAY[5], -- Number of units per layer
- 'learning_rate_init=0.003,
- n_iterations=500,
- tolerance=0', -- Optimizer params
- 'tanh', -- Activation function
- NULL, -- Default weight (1)
- FALSE, -- No warm start
- FALSE -- Not verbose
- );
-
- """.format(**args)
- example = classification_example if is_classification else
regression_example
optimizer_params = """
------------------------------------------------------------------------------------------------
OPTIMIZER PARAMS
@@ -1605,8 +1304,6 @@ def mlp_help(schema_madlib, message, is_classification):
return summary
elif message.lower() in ('usage', 'help', '?'):
return usage
- elif message.lower() == 'example':
- return example
elif message.lower() == 'optimizer_params':
return optimizer_params
return """
@@ -1629,10 +1326,7 @@ def mlp_predict_help(schema_madlib, message):
functions.
For more details on function usage:
- SELECT {schema_madlib}.mlp_predict('usage')
-
- For a small example on using the function:
- SELECT {schema_madlib}.mlp_predict('example')""".format(**args)
+ SELECT {schema_madlib}.mlp_predict('usage')""".format(**args)
usage = """
---------------------------------------------------------------------------
@@ -1669,75 +1363,10 @@ def mlp_predict_help(schema_madlib, message):
""".format(**args)
- example = """
- -- See {schema_madlib}.mlp_classification('example') for test
- -- and model tables
-
- -- Predict classes using
- SELECT {schema_madlib}.mlp_predict(
- 'mlp_model', -- Model table
- 'iris_data', -- Test data table
- 'id', -- Id column in test table
- 'mlp_prediction', -- Output table for predictions
- 'response' -- Output classes, not probabilities
- );
- SELECT * FROM mlp_prediction;
-
- WITH total_count AS (SELECT count(*) AS c FROM iris_data)
- SELECT count(*)/((SELECT c FROM total_count)::DOUBLE PRECISION)
- AS train_accuracy
- FROM
- (
- SELECT iris_data.class_text AS actual_label,
- mlp_prediction.estimated_class_text AS predicted_label
- FROM mlp_prediction
- INNER JOIN iris_data ON iris_data.id=mlp_prediction.id
- ) q
- WHERE q.actual_label=q.predicted_label;
-
- -- Predict using models specific to states:
- SELECT {schema_madlib}.mlp_predict(
- 'mlp_model_group', -- Grouping based model table
- 'iris_data', -- Test data table
- 'id', -- Id column in test table
- 'mlp_prediction', -- Output table for predictions
- 'response' -- Output classes, not probabilities
- );
- SELECT * FROM mlp_prediction;
-
- -- See {schema_madlib}.mlp_regression('example') for test
- -- and model tables.
-
- -- Predict using the regression model:
- DROP TABLE IF EXISTS mlp_regress_prediction;
- SELECT madlib.mlp_predict(
- 'mlp_regress', -- Model table
- 'lin_housing', -- Test data table
- 'id', -- Id column in test table
- 'mlp_regress_prediction', -- Output table for predictions
- 'response' -- Output values, not probabilities
- );
- SELECT * FROM mlp_regress_prediction;
-
- -- Predict using the zipcode specific regression models:
- DROP TABLE IF EXISTS mlp_regress_prediction;
- SELECT madlib.mlp_predict(
- 'mlp_regress_group', -- Grouping based model table
- 'lin_housing', -- Test data table
- 'id', -- Id column in test table
- 'mlp_regress_prediction', -- Output table for predictions
- 'response' -- Output values, not probabilities
- );
- SELECT * FROM mlp_regress_prediction;
-
- """.format(**args)
-
if not message:
return summary
elif message.lower() in ('usage', 'help', '?'):
return usage
- elif message.lower() == 'example':
- return example
return """
No such option. Use "SELECT {schema_madlib}.mlp_predict()" for help.
""".format(**args)
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/elastic_net/elastic_net.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/elastic_net/elastic_net.py_in
b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
index 555840a..c6dec53 100644
--- a/src/ports/postgres/modules/elastic_net/elastic_net.py_in
+++ b/src/ports/postgres/modules/elastic_net/elastic_net.py_in
@@ -62,133 +62,8 @@ def elastic_net_help(schema_madlib,
family_or_optimizer=None, **kwargs):
--
Run: SELECT {schema_madlib}.elastic_net_train('predict');
to see how to predict.
- --
- Run: SELECT {schema_madlib}.elastic_net_train('example');
- to see some examples.
""".format(schema_madlib=schema_madlib)
- if (family_or_optimizer.lower() in ('example', 'examples')):
- return """
- ----------------------------------------------------------------
- EXAMPLE
- ----------------------------------------------------------------
- Create an input data set:
- DROP TABLE IF EXISTS houses;
- CREATE TABLE houses ( id INT,
- tax INT,
- bedroom INT,
- bath FLOAT,
- price INT,
- size INT,
- lot INT,
- zipcode INT);
- INSERT INTO houses VALUES
- (1, 590, 2, 1, 50000, 770, 22100, 94301),
- (2, 1050, 3, 2, 85000, 1410, 12000, 94301),
- (3, 20, 3, 1, 22500, 1060, 3500, 94301),
- (4, 870, 2, 2, 90000, 1300, 17500, 94301),
- (5, 1320, 3, 2, 133000, 1500, 30000, 94301),
- (6, 1350, 2, 1, 90500, 820, 25700, 94301),
- (7, 2790, 3, 2.5, 260000, 2130, 25000, 94301),
- (8, 680, 2, 1, 142500, 1170, 22000, 94301),
- (9, 1840, 3, 2, 160000, 1500, 19000, 94301),
- (10, 3680, 4, 2, 240000, 2790, 20000, 94301),
- (11, 1660, 3, 1, 87000, 1030, 17500, 94301),
- (12, 1620, 3, 2, 118600, 1250, 20000, 94301),
- (13, 3100, 3, 2, 140000, 1760, 38000, 94301),
- (14, 2070, 2, 3, 148000, 1550, 14000, 94301),
- (15, 650, 3, 1.5, 65000, 1450, 12000, 94301),
- (16, 770, 2, 2, 91000, 1300, 17500, 76010),
- (17, 1220, 3, 2, 132300, 1500, 30000, 76010),
- (18, 1150, 2, 1, 91100, 820, 25700, 76010),
- (19, 2690, 3, 2.5, 260011, 2130, 25000, 76010),
- (20, 780, 2, 1, 141800, 1170, 22000, 76010),
- (21, 1910, 3, 2, 160900, 1500, 19000, 76010),
- (22, 3600, 4, 2, 239000, 2790, 20000, 76010),
- (23, 1600, 3, 1, 81010, 1030, 17500, 76010),
- (24, 1590, 3, 2, 117910, 1250, 20000, 76010),
- (25, 3200, 3, 2, 141100, 1760, 38000, 76010),
- (26, 2270, 2, 3, 148011, 1550, 14000, 76010),
- (27, 750, 3, 1.5, 66000, 1450, 12000, 76010);
-
- Train a model:
- DROP TABLE IF EXISTS houses_en, houses_en_summary;
- SELECT {schema_madlib}.elastic_net_train(
- 'houses', -- source table
- 'houses_en', -- result table
- 'price', -- dependent variable
- 'array[tax, bath, size]', -- independent variable
- 'gaussian', -- regression family
- 0.5, -- alpha value
- 0.1, -- lambda value
- TRUE, -- standardize
- NULL, -- grouping column(s)
- 'fista', -- optimizer
- '', -- optimizer parameters
- NULL, -- excluded columns
- 10000, -- maximum iterations
- 1e-6 -- tolerance value
- );
-
- View the resulting model:
- \\x on
- SELECT * FROM houses_en;
- \\x off
-
- Use the prediction function to evaluate residuals:
- SELECT id, price, predict, price - predict AS residual
- FROM (
- SELECT
- houses.*,
- {schema_madlib}.elastic_net_gaussian_predict(
- m.coef_all,
- m.intercept,
- ARRAY[tax,bath,size]
- ) AS predict
- FROM houses, houses_en m) s
- ORDER BY id;
-
- Additional Example (with grouping):
- DROP TABLE IF EXISTS houses_en1, houses_en1_summary;
- SELECT {schema_madlib}.elastic_net_train( 'houses',
- 'houses_en1',
- 'price',
- 'array[tax, bath, size]',
- 'gaussian',
- 1,
- 30000,
- TRUE,
- 'zipcode',
- 'fista',
- '',
- NULL,
- 10000,
- 1e-6
- );
-
- View the resulting model and see a separate model for each group:
- \\x on
- SELECT * FROM houses_en1;
- \\x off
-
- Use the prediction function to evaluate residuals:
- SELECT {schema_madlib}.elastic_net_predict(
- 'houses_en1', -- model table
- 'houses', -- new source data table
- 'id', -- unique ID associated with each row
- 'houses_en1_prediction' -- table to store prediction result
- );
-
- View the results:
- SELECT houses.id,
- houses.price,
- houses_en1_prediction.prediction,
- houses.price - houses_en1_prediction.prediction AS residual
- FROM houses_en1_prediction, houses
- WHERE houses.id=houses_en1_prediction.id;
-
- """
-
if (family_or_optimizer.lower() in ('usage', 'help', '?')):
return """
----------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/glm/glm.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/glm/glm.py_in
b/src/ports/postgres/modules/glm/glm.py_in
index 49dd9c8..718837e 100644
--- a/src/ports/postgres/modules/glm/glm.py_in
+++ b/src/ports/postgres/modules/glm/glm.py_in
@@ -378,10 +378,6 @@ Function to fit a generalized linear model, relating
responses to linear combina
of predictor variables.
For details on function usage:
- SELECT {schema_madlib}.glm('usage')
-
-For a small example on using the function:
- SELECT {schema_madlib}.glm('example')
"""
elif message in ['usage', 'help', '?']:
@@ -449,88 +445,6 @@ A summary table named <out_table>_summary is also created
at the same time, whic
total_rows_processed bigint, -- total numbers of rows processed
total_rows_skipped bigint, -- total numbers of rows skipped
"""
- elif message in ['example', 'examples']:
-
- help_string = """
-CREATE TABLE warpbreaks(
- id serial,
- breaks integer,
- wool char(1),
- tension char(1)
-);
-INSERT INTO warpbreaks(breaks, wool, tension) VALUES
-(26, 'A', 'L'),
-(30, 'A', 'L'),
-(54, 'A', 'L'),
-(25, 'A', 'L'),
-(70, 'A', 'L'),
-(52, 'A', 'L'),
-(51, 'A', 'L'),
-(26, 'A', 'L'),
-(67, 'A', 'L'),
-(18, 'A', 'M'),
-(21, 'A', 'M'),
-(29, 'A', 'M'),
-(17, 'A', 'M'),
-(12, 'A', 'M'),
-(18, 'A', 'M'),
-(35, 'A', 'M'),
-(30, 'A', 'M'),
-(36, 'A', 'M'),
-(36, 'A', 'H'),
-(21, 'A', 'H'),
-(24, 'A', 'H'),
-(18, 'A', 'H'),
-(10, 'A', 'H'),
-(43, 'A', 'H'),
-(28, 'A', 'H'),
-(15, 'A', 'H'),
-(26, 'A', 'H'),
-(27, 'B', 'L'),
-(14, 'B', 'L'),
-(29, 'B', 'L'),
-(19, 'B', 'L'),
-(29, 'B', 'L'),
-(31, 'B', 'L'),
-(41, 'B', 'L'),
-(20, 'B', 'L'),
-(44, 'B', 'L'),
-(42, 'B', 'M'),
-(26, 'B', 'M'),
-(19, 'B', 'M'),
-(16, 'B', 'M'),
-(39, 'B', 'M'),
-(28, 'B', 'M'),
-(21, 'B', 'M'),
-(39, 'B', 'M'),
-(29, 'B', 'M'),
-(20, 'B', 'H'),
-(21, 'B', 'H'),
-(24, 'B', 'H'),
-(17, 'B', 'H'),
-(13, 'B', 'H'),
-(15, 'B', 'H'),
-(15, 'B', 'H'),
-(16, 'B', 'H'),
-(28, 'B', 'H');
-
-SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy',
'wool,tension');
-
--- Drop output tables before calling the function
-DROP TABLE IF EXISTS glm_model;
-DROP TABLE IF EXISTS glm_model_summary;
-
-SELECT glm('warpbreaks_dummy',
- 'glm_model',
- 'breaks',
- 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]',
- 'family=poisson, link=log',
- NULL,
- 'max_iter=100,optimizer=irls,tolerance=1e-6',
- true);
-
-SELECT * from glm_model;
- """
else:
help_string = "No such option. Use {schema_madlib}.glm('help')"
@@ -561,9 +475,6 @@ coefficients should match the number of variables in the
new predictors.
For details on function usage:
SELECT {schema_madlib}.glm_predict('usage')
-For a small example on using the function:
- SELECT {schema_madlib}.glm_predict('example')
-
For prediction functions related to specific distributions:
SELECT {schema_madlib}.glm_predict_poisson('help')
SELECT {schema_madlib}.glm_predict_binomial('help')
@@ -586,92 +497,6 @@ SELECT {schema_madlib}.glm_predict(
The output is a table with one column which gives the estimated conditional
means for the new predictors.
"""
- elif message in ['example', 'examples']:
-
- help_string = """
-DROP TABLE IF EXISTS warpbreaks, warpbreaks_dummy, glm_model,
glm_model_summary;
-CREATE TABLE warpbreaks(
- id serial,
- breaks integer,
- wool char(1),
- tension char(1)
-);
-INSERT INTO warpbreaks(breaks, wool, tension) VALUES
-(26, 'A', 'L'),
-(30, 'A', 'L'),
-(54, 'A', 'L'),
-(25, 'A', 'L'),
-(70, 'A', 'L'),
-(52, 'A', 'L'),
-(51, 'A', 'L'),
-(26, 'A', 'L'),
-(67, 'A', 'L'),
-(18, 'A', 'M'),
-(21, 'A', 'M'),
-(29, 'A', 'M'),
-(17, 'A', 'M'),
-(12, 'A', 'M'),
-(18, 'A', 'M'),
-(35, 'A', 'M'),
-(30, 'A', 'M'),
-(36, 'A', 'M'),
-(36, 'A', 'H'),
-(21, 'A', 'H'),
-(24, 'A', 'H'),
-(18, 'A', 'H'),
-(10, 'A', 'H'),
-(43, 'A', 'H'),
-(28, 'A', 'H'),
-(15, 'A', 'H'),
-(26, 'A', 'H'),
-(27, 'B', 'L'),
-(14, 'B', 'L'),
-(29, 'B', 'L'),
-(19, 'B', 'L'),
-(29, 'B', 'L'),
-(31, 'B', 'L'),
-(41, 'B', 'L'),
-(20, 'B', 'L'),
-(44, 'B', 'L'),
-(42, 'B', 'M'),
-(26, 'B', 'M'),
-(19, 'B', 'M'),
-(16, 'B', 'M'),
-(39, 'B', 'M'),
-(28, 'B', 'M'),
-(21, 'B', 'M'),
-(39, 'B', 'M'),
-(29, 'B', 'M'),
-(20, 'B', 'H'),
-(21, 'B', 'H'),
-(24, 'B', 'H'),
-(17, 'B', 'H'),
-(13, 'B', 'H'),
-(15, 'B', 'H'),
-(15, 'B', 'H'),
-(16, 'B', 'H'),
-(28, 'B', 'H');
-
-SELECT create_indicator_variables('warpbreaks', 'warpbreaks_dummy',
'wool,tension');
-
--- Drop output tables before calling the function
-DROP TABLE IF EXISTS glm_model;
-DROP TABLE IF EXISTS glm_model_summary;
-
-SELECT glm('warpbreaks_dummy',
- 'glm_model',
- 'breaks',
- 'ARRAY[1.0,"wool_B","tension_M", "tension_H"]',
- 'family=poisson, link=log',
- NULL,
- 'max_iter=100,optimizer=irls,tolerance=1e-6',
- true);
-
-SELECT * from glm_model;
-SELECT w.id, madlib.glm_predict(coef, ARRAY[1, "wool_B", "tension_M",
"tension_H"]::float8[],'log') as mu
-FROM warpbreaks_dummy w, glm_model m
-ORDER BY w.id;
- """
else:
help_string = "No such option. Use {schema_madlib}.glm_predict('help')"
@@ -709,9 +534,6 @@ mean for the new predictors, rounded to the nearest
integral value.
For more details on glm predict functions:
SELECT {schema_madlib}.glm_predict('usage')
-
-For examples:
- SELECT {schema_madlib}.glm_predict('example')
"""
else:
help_string = "No such option. Use
{schema_madlib}.glm_predict_poisson('help')"
@@ -747,9 +569,6 @@ of the dependent variable as a boolean value.
For more details on glm predict functions:
SELECT {schema_madlib}.glm_predict('usage')
-
-For examples:
- SELECT {schema_madlib}.glm_predict('example')
"""
else:
help_string = "No such option. Use
{schema_madlib}.glm_predict_binomial('help')"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/glm/multinom.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/glm/multinom.py_in
b/src/ports/postgres/modules/glm/multinom.py_in
index f5469d1..1293f57 100644
--- a/src/ports/postgres/modules/glm/multinom.py_in
+++ b/src/ports/postgres/modules/glm/multinom.py_in
@@ -369,9 +369,6 @@ Currently only logit link functions are supported.
For more details on function usage:
SELECT {schema_madlib}.multinom('usage')
-
-For a small example on using the function:
- SELECT {schema_madlib}.multinom('example')
"""
elif message in ['usage', 'help', '?']:
@@ -422,89 +419,6 @@ A summary table named <out_table>_summary is also created
at the same time, whic
total_rows_processed bigint, -- total numbers of rows processed
total_rows_skipped bigint, -- total numbers of rows skipped
"""
-
- elif message in ['example', 'examples']:
-
- help_string = """
-
-DROP TABLE IF EXISTS test3;
-CREATE TABLE test3 (
- feat1 INTEGER,
- feat2 INTEGER,
- cat INTEGER
-);
-INSERT INTO test3(feat1, feat2, cat) VALUES
-(1,35,1),
-(2,33,0),
-(3,39,1),
-(1,37,1),
-(2,31,1),
-(3,36,0),
-(2,36,1),
-(2,31,1),
-(2,41,1),
-(2,37,1),
-(1,44,1),
-(3,33,2),
-(1,31,1),
-(2,44,1),
-(1,35,1),
-(1,44,0),
-(1,46,0),
-(2,46,1),
-(2,46,2),
-(3,49,1),
-(2,39,0),
-(2,44,1),
-(1,47,1),
-(1,44,1),
-(1,37,2),
-(3,38,2),
-(1,49,0),
-(2,44,0),
-(3,61,2),
-(1,65,2),
-(3,67,1),
-(3,65,2),
-(1,65,2),
-(2,67,2),
-(1,65,2),
-(1,62,2),
-(3,52,2),
-(3,63,2),
-(2,59,2),
-(3,65,2),
-(2,59,0),
-(3,67,2),
-(3,67,2),
-(3,60,2),
-(3,67,2),
-(3,62,2),
-(2,54,2),
-(3,65,2),
-(3,62,2),
-(2,59,2),
-(3,60,2),
-(3,63,2),
-(3,65,2),
-(2,63,1),
-(2,67,2),
-(2,65,2),
-(2,62,2);
-
--- Run the multilogistic regression function.
-DROP TABLE IF EXISTS test3_output;
-DROP TABLE IF EXISTS test3_output_summary;
-SELECT madlib.multinom('test3',
- 'test3_output',
- 'cat',
- 'ARRAY[1, feat1, feat2]',
- '0',
- 'logit'
- );
-
-SELECT * from test3_output;
- """
else:
help_string = "No such option. Use {schema_madlib}.multinom('help')"
@@ -732,9 +646,6 @@ a new set of predictors.
For more details on function usage:
SELECT {schema_madlib}.multinom_predict('usage')
-
-For a small example on using the function:
- SELECT {schema_madlib}.multinom_predict('example')
"""
elif message in ['usage', 'help', '?']:
@@ -757,14 +668,6 @@ SELECT {schema_madlib}.multinom_predict(
The output is a table with one column which gives the predicted category when
predict_type
is response and probability when predict_type is probability.
"""
- elif message in ['example', 'examples']:
- help_string = """
--- run the training example first
-ALTER TABLE test3 ADD COLUMN id SERIAL;
-DROP TABLE IF EXISTS test3_predict;
-SELECT multinom_predict('test3_out', 'test3', 'test3_predict', 'response',
'id');
-SELECT * FROM test3_predict;
- """
else:
help_string = "No such option. Use
{schema_madlib}.multinom_predict('help')"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/glm/ordinal.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/glm/ordinal.py_in
b/src/ports/postgres/modules/glm/ordinal.py_in
index ffa7ccf..bbc33a7 100644
--- a/src/ports/postgres/modules/glm/ordinal.py_in
+++ b/src/ports/postgres/modules/glm/ordinal.py_in
@@ -360,9 +360,6 @@ Currently logit and probit link functions are supported.
For more details on function usage:
SELECT {schema_madlib}.ordinal('usage')
-
-For a small example on using the function:
- SELECT {schema_madlib}.ordinal('example')
"""
elif message in ['usage', 'help', '?']:
@@ -415,89 +412,6 @@ A summary table named <out_table>_summary is also created
at the same time, whic
total_rows_processed bigint, -- total numbers of rows processed
total_rows_skipped bigint, -- total numbers of rows skipped
"""
-
- elif message in ['example', 'examples']:
-
- help_string = """
-
-DROP TABLE IF EXISTS test3;
-CREATE TABLE test3 (
- feat1 INTEGER,
- feat2 INTEGER,
- cat INTEGER
-);
-INSERT INTO test3(feat1, feat2, cat) VALUES
-(1,35,1),
-(2,33,0),
-(3,39,1),
-(1,37,1),
-(2,31,1),
-(3,36,0),
-(2,36,1),
-(2,31,1),
-(2,41,1),
-(2,37,1),
-(1,44,1),
-(3,33,2),
-(1,31,1),
-(2,44,1),
-(1,35,1),
-(1,44,0),
-(1,46,0),
-(2,46,1),
-(2,46,2),
-(3,49,1),
-(2,39,0),
-(2,44,1),
-(1,47,1),
-(1,44,1),
-(1,37,2),
-(3,38,2),
-(1,49,0),
-(2,44,0),
-(3,61,2),
-(1,65,2),
-(3,67,1),
-(3,65,2),
-(1,65,2),
-(2,67,2),
-(1,65,2),
-(1,62,2),
-(3,52,2),
-(3,63,2),
-(2,59,2),
-(3,65,2),
-(2,59,0),
-(3,67,2),
-(3,67,2),
-(3,60,2),
-(3,67,2),
-(3,62,2),
-(2,54,2),
-(3,65,2),
-(3,62,2),
-(2,59,2),
-(3,60,2),
-(3,63,2),
-(3,65,2),
-(2,63,1),
-(2,67,2),
-(2,65,2),
-(2,62,2);
-
--- Run the ordinal logistic regression function.
-DROP TABLE IF EXISTS test3_output;
-DROP TABLE IF EXISTS test3_output_summary;
-SELECT madlib.ordinal('test3',
- 'test3_output',
- 'cat',
- 'ARRAY[feat1, feat2]',
- '0<1<2',
- 'logit'
- );
-
-SELECT * from test3_output;
- """
else:
help_string = "No such option. Use {schema_madlib}.ordinal('help')"
@@ -818,9 +732,6 @@ a new set of predictors.
For more details on function usage:
SELECT {schema_madlib}.ordinal_predict('usage')
-
-For a small example on using the function:
- SELECT {schema_madlib}.ordinal_predict('example')
"""
elif message in ['usage', 'help', '?']:
@@ -842,14 +753,6 @@ SELECT {schema_madlib}.ordinal_predict(
The output is a table with one column which gives the predicted category when
predict_type
is response and probability when predict_type is probability.
"""
- elif message in ['example', 'examples']:
- help_string = """
--- run the training example first
-ALTER TABLE test3 ADD COLUMN id SERIAL;
-DROP TABLE IF EXISTS test3_predict;
-SELECT ordinal_predict('test3_out', 'test3', 'test3_predict', 'probability');
-SELECT * FROM test3_predict;
- """
else:
help_string = "No such option. Use
{schema_madlib}.ordinal_predict('help')"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/apsp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/apsp.py_in
b/src/ports/postgres/modules/graph/apsp.py_in
index d0bba00..4da2fd2 100644
--- a/src/ports/postgres/modules/graph/apsp.py_in
+++ b/src/ports/postgres/modules/graph/apsp.py_in
@@ -660,78 +660,6 @@ every group and has the following columns:
- path (ARRAY) : The shortest path from the source vertex to the
destination vertex.
"""
- elif message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- weight DOUBLE PRECISION
-);
-
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6),
-(7)
-;
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(0, 4, 10),
-(1, 2, 2),
-(1, 3, 10),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 3),
-(3, 0, 1),
-(4, 0, -2),
-(5, 6, 1),
-(6, 7, 1)
-;
-
--- Compute the apsp:
-DROP TABLE IF EXISTS out;
-SELECT madlib.graph_apsp(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest, weight=weight', -- Comma delimited string of edge
arguments
- 'out' -- Output table of apsp
-);
--- View the apsp costs for every vertex:
-SELECT * FROM out ORDER BY src, dest;
-
--- View the actual shortest path for a vertex:
-SELECT graph_apsp_get_path('out',0, 5,'out_path');
-SELECT * FROM out_path;
-
--- Create a graph with 2 groups:
-DROP TABLE IF EXISTS edge_gr;
-CREATE TABLE edge_gr AS
-(
- SELECT *, 0 AS grp FROM edge
- UNION
- SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
-);
-INSERT INTO edge_gr VALUES
-(4,5,-20,1);
-
--- Find apsp for all groups:
-DROP TABLE IF EXISTS out_gr, out_gr_summary;
-SELECT graph_apsp('vertex',NULL,'edge_gr',NULL,'out_gr','grp');
-"""
else:
help_string = "No such option. Use {schema_madlib}.graph_apsp()"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/bfs.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/bfs.py_in
b/src/ports/postgres/modules/graph/bfs.py_in
index 0504d91..f0c07ac 100644
--- a/src/ports/postgres/modules/graph/bfs.py_in
+++ b/src/ports/postgres/modules/graph/bfs.py_in
@@ -428,64 +428,6 @@ grouping columns):
source_vertex. Will use 'parent' for column naming. For the
case where vertex_id = source_vertex, the value for parent is
NULL.
"""
- elif message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex, edge;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER
- );
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6),
-(7),
-(8),
-(9),
-(10),
-(11)
-;
-INSERT INTO edge VALUES
-(0, 5),
-(1, 0),
-(1, 3),
-(2, 6),
-(3, 4),
-(3, 5),
-(4, 2),
-(8, 9),
-(9, 10),
-(9, 11),
-(10, 8)
-;
-
--- Traverse undirected graph from vertex 3:
-DROP TABLE IF EXISTS out, out_summary;
-SELECT madlib.graph_bfs(
- 'vertex', -- Vertex table
- NULL, -- Vertix id column (NULL means use
default naming)
- 'edge', -- Edge table
- NULL, -- Edge arguments (NULL means use
default naming)
- 3, -- Source vertex for BFS
- 'out' -- Output table of nodes reachable
from source_vertex
- );
- -- Default values used for the other arguments
-SELECT * FROM out ORDER BY dist,id;
-
-SELECT * FROM out_summary;
-
-"""
else:
help_string = "No such option. Use {schema_madlib}.graph_bfs()"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/hits.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/hits.py_in
b/src/ports/postgres/modules/graph/hits.py_in
index 5299a46..23b6b1c 100644
--- a/src/ports/postgres/modules/graph/hits.py_in
+++ b/src/ports/postgres/modules/graph/hits.py_in
@@ -525,87 +525,7 @@ number of iterations required for convergence. It is named
by adding the
suffix '_summary' to the 'out_table' parameter.
"""
else:
- if message is not None and \
- message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex, edge;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- user_id INTEGER
- );
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6);
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(0, 4, 1),
-(1, 2, 1),
-(1, 3, 1),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 1),
-(3, 0, 1),
-(4, 0, 1),
-(5, 6, 1),
-(6, 3, 1),
-(0, 1, 2),
-(0, 2, 2),
-(0, 4, 2),
-(1, 2, 2),
-(1, 3, 2),
-(2, 3, 2),
-(3, 0, 2),
-(4, 0, 2),
-(5, 6, 2),
-(6, 3, 2);
-
--- Compute the HITS score:
-DROP TABLE IF EXISTS hits_out, hits_out_summary;
-SELECT {schema_madlib}.hits(
- 'vertex', -- Vertex table
- 'id', -- Vertex id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimited string of edge
arguments
- 'hits_out'); -- Output table of HITS
--- View the authority and hub scores of all vertices, ordered by their id.
-SELECT * FROM hits_out ORDER BY id;
-
--- Compute the HITS score of nodes associated with each user:
-DROP TABLE IF EXISTS hits_out, hits_out_summary;
-SELECT {schema_madlib}.hits(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge arguments
- 'hits_out', -- Output table of HITS
- NULL, -- Default max_iter
- NULL, -- Threshold
- 'user_id'); -- Grouping column
-
--- View the authority and hub scores of all vertices, ordered by the grouping
column.
-SELECT * FROM hits_out ORDER BY user_id, id;
-
--- View the summary table to find the number of iterations required for
--- convergence.
-SELECT * FROM hits_out_summary;
-
-"""
- else:
- help_string = """
+ help_string = """
----------------------------------------------------------------------------
SUMMARY
----------------------------------------------------------------------------
@@ -614,10 +534,6 @@ all the vertices in the graph.
--
For an overview on usage, run:
SELECT {schema_madlib}.hits('usage');
-
-For some examples, run:
-SELECT {schema_madlib}.hits('example')
---
"""
return help_string.format(schema_madlib=schema_madlib)
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/measures.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/measures.py_in
b/src/ports/postgres/modules/graph/measures.py_in
index eb79adb..1f07522 100644
--- a/src/ports/postgres/modules/graph/measures.py_in
+++ b/src/ports/postgres/modules/graph/measures.py_in
@@ -368,58 +368,6 @@ def graph_vertex_degrees(schema_madlib, vertex_table,
vertex_id, edge_table,
# All help functions
# -----------------------------------------------------------------------
-
-CREATE_GRAPH_EXAMPLE = """
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- weight DOUBLE PRECISION
-);
-
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6),
-(7)
-;
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(0, 4, 10),
-(1, 2, 2),
-(1, 3, 10),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 3),
-(3, 0, 1),
-(4, 0, -2),
-(5, 6, 1),
-(6, 7, 1)
-;
-"""
-
-COMPUTE_APSP_EXAMPLE = """
--- Compute the apsp:
-DROP TABLE IF EXISTS out;
-SELECT graph_apsp(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest, weight=weight', -- Comma delimited string of edge
arguments
- 'out' -- Output table of apsp
-);
-"""
-
-
def graph_closeness_help(schema_madlib, message, **kwargs):
intro = """
@@ -461,25 +409,10 @@ the following columns (in addition to the grouping
columns):
- k_degree : Total number of reachable vertices.
"""
- elif message.lower() in ['example', 'examples']:
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
-{create_graph_example}
-{compute_apsp_example}
-
--# Compute the closeness measure for all nodes:
-DROP TABLE IF EXISTS out_closeness;
-SELECT {schema_madlib}.graph_closeness('out_apsp', 'out_closeness');
-SELECT * FROM out_closeness;
- """
else:
help_string = "No such option. Use {schema_madlib}.graph_closeness()"
- return help_string.format(schema_madlib=schema_madlib,
- create_graph_example=CREATE_GRAPH_EXAMPLE,
- compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+ return help_string.format(schema_madlib=schema_madlib)
# -------------------------------------------------------------------------
@@ -511,25 +444,10 @@ SELECT {schema_madlib}.graph_diameter(
It contains a row for every group, the diameter value and the two vertices
that are the farthest apart.
"""
- elif message.lower() in ['example', 'examples']:
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
-{create_graph_example}
-{compute_apsp_example}
-
--# Compute the diameter measure for the graph:
-DROP TABLE IF EXISTS out_diameter;
-SELECT {schema_madlib}.graph_diameter('out_apsp', 'out_diameter');
-SELECT * FROM out_diameter;
- """
else:
help_string = "No such option. Use {schema_madlib}.graph_diameter()"
- return help_string.format(schema_madlib=schema_madlib,
- create_graph_example=CREATE_GRAPH_EXAMPLE,
- compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+ return help_string.format(schema_madlib=schema_madlib)
# -------------------------------------------------------------------------
@@ -562,25 +480,10 @@ SELECT {schema_madlib}.graph_avg_path_length(
----------------------------------------------------------------------------
It contains a row for every group, and the average path value.
"""
- elif message.lower() in ['example', 'examples']:
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
-{create_graph_example}
-{compute_apsp_example}
-
--# Compute the average path length for the graph:
-DROP TABLE IF EXISTS out_avg_path_length;
-SELECT {schema_madlib}.graph_avg_path_length('out_apsp',
'out_avg_path_length');
-SELECT * FROM out_avg_path_length;
- """
else:
help_string = "No such option. Use
{schema_madlib}.graph_avg_path_length()"
- return help_string.format(schema_madlib=schema_madlib,
- create_graph_example=CREATE_GRAPH_EXAMPLE,
- compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+ return help_string.format(schema_madlib=schema_madlib)
# -------------------------------------------------------------------------
@@ -618,26 +521,9 @@ It contains a row for every vertex of every group and has
the following columns
- outdegree : Number of outgoing edges from the vertex.
"""
- elif message.lower() in ['example', 'examples']:
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
-{create_graph_example}
-
-DROP TABLE IF EXISTS degrees;
-SELECT {schema_madlib}.graph_vertex_degrees(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column (NULL means use default naming)
- 'edge', -- Edge table
- 'src=src, dest=dest, weight=weight',
- 'degrees'); -- Output table of shortest paths
-SELECT * FROM degrees ORDER BY id;
- """
else:
help_string = "No such option. Use
{schema_madlib}.graph_vertex_degrees()"
return help_string.format(schema_madlib=schema_madlib,
- create_graph_example=CREATE_GRAPH_EXAMPLE,
graph_usage=usage_text)
# -------------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/pagerank.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/pagerank.py_in
b/src/ports/postgres/modules/graph/pagerank.py_in
index 71cddd2..e39d216 100644
--- a/src/ports/postgres/modules/graph/pagerank.py_in
+++ b/src/ports/postgres/modules/graph/pagerank.py_in
@@ -765,110 +765,7 @@ number of iterations required for convergence. It is
named by adding the
suffix '_summary' to the 'out_table' parameter.
"""
else:
- if message is not None and \
- message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex, edge;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- user_id INTEGER
- );
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6);
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(0, 4, 1),
-(1, 2, 1),
-(1, 3, 1),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 1),
-(3, 0, 1),
-(4, 0, 1),
-(5, 6, 1),
-(6, 3, 1),
-(0, 1, 2),
-(0, 2, 2),
-(0, 4, 2),
-(1, 2, 2),
-(1, 3, 2),
-(2, 3, 2),
-(3, 0, 2),
-(4, 0, 2),
-(5, 6, 2),
-(6, 3, 2);
-
--- Compute the PageRank:
-DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
-SELECT madlib.pagerank(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge arguments
- 'pagerank_out'); -- Output table of PageRank
-
--- View the PageRank of all vertices, sorted by their scores.
-SELECT * FROM pagerank_out ORDER BY pagerank DESC;
--- View the summary table to find the number of iterations required for
--- convergence.
-SELECT * FROM pagerank_out_summary;
-
--- Compute PageRank of nodes associated with each user:
-DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
-SELECT madlib.pagerank(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge arguments
- 'pagerank_out', -- Output table of PageRank
- NULL, -- Default damping factor
- NULL, -- Default max_iter
- 0.00000001, -- Threshold
- 'user_id'); -- Grouping column
-
--- View the PageRank of all vertices, sorted by their scores.
-SELECT * FROM pagerank_out ORDER BY user_id, pagerank DESC;
--- View the summary table to find the number of iterations required for
--- convergence for each group.
-SELECT * FROM pagerank_out_summary;
-
--- Compute the Personalized PageRank:
-DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary;
-SELECT madlib.pagerank(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge
arguments
- 'pagerank_out', -- Output table of PageRank
- NULL, -- Default damping factor (0.85)
- NULL, -- Default max iters (100)
- NULL, -- Default Threshold
- NULL, -- No Grouping
- ARRAY[2,4]); -- Personlized Nodes
-
--- View the Personalized PageRank of all vertices, sorted by their scores.
-SELECT * FROM pagerank_out ORDER BY pagerank DESC;
--- View the summary table to find the number of iterations required for
--- convergence.
-SELECT * FROM pagerank_out_summary;
-"""
- else:
- help_string = """
+ help_string = """
----------------------------------------------------------------------------
SUMMARY
----------------------------------------------------------------------------
@@ -877,10 +774,6 @@ the vertices in the graph.
--
For an overview on usage, run:
SELECT {schema_madlib}.pagerank('usage');
-
-For some examples, run:
-SELECT {schema_madlib}.pagerank('example')
---
"""
return help_string.format(schema_madlib=schema_madlib)
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/sssp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/sssp.py_in
b/src/ports/postgres/modules/graph/sssp.py_in
index a78eea4..d7f2c80 100644
--- a/src/ports/postgres/modules/graph/sssp.py_in
+++ b/src/ports/postgres/modules/graph/sssp.py_in
@@ -627,79 +627,6 @@ every group and has the following columns:
- path (ARRAY) : The shortest path from the source vertex (as specified
in the SSSP execution) to the destination vertex.
"""
- elif message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path;
-CREATE TABLE vertex(
- id INTEGER
- );
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- weight DOUBLE PRECISION
-);
-
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6),
-(7)
-;
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(0, 4, 10),
-(1, 2, 2),
-(1, 3, 10),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 3),
-(3, 0, 1),
-(4, 0, -2),
-(5, 6, 1),
-(6, 7, 1)
-;
-
--- Compute the SSSP:
-DROP TABLE IF EXISTS out;
-SELECT madlib.graph_sssp(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest, weight=weight', -- Comma delimted string of edge
arguments
- 0, -- The source vertex
- 'out' -- Output table of SSSP
-);
--- View the SSSP costs for every vertex:
-SELECT * FROM out ORDER BY id;
-
--- View the actual shortest path for a vertex:
-SELECT graph_sssp_get_path('out',5,'out_path');
-SELECT * FROM out_path;
-
--- Create a graph with 2 groups:
-DROP TABLE IF EXISTS edge_gr;
-CREATE TABLE edge_gr AS
-(
- SELECT *, 0 AS grp FROM edge
- UNION
- SELECT *, 1 AS grp FROM edge WHERE src < 6 AND dest < 6
-);
-INSERT INTO edge_gr VALUES
-(4,5,-20,1);
-
--- Find SSSP for all groups:
-DROP TABLE IF EXISTS out_gr, out_gr_summary;
-SELECT graph_sssp('vertex',NULL,'edge_gr',NULL,0,'out_gr','grp');
-"""
else:
help_string = "No such option. Use {schema_madlib}.graph_sssp()"
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/graph/wcc.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/wcc.py_in
b/src/ports/postgres/modules/graph/wcc.py_in
index 09f8fea..de1af27 100644
--- a/src/ports/postgres/modules/graph/wcc.py_in
+++ b/src/ports/postgres/modules/graph/wcc.py_in
@@ -652,126 +652,7 @@ def wcc_help(schema_madlib, message, **kwargs):
-- of number of components.
);"""
else:
- if message is not None and \
- message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Create a graph, represented as vertex and edge tables.
-DROP TABLE IF EXISTS vertex, edge;
-CREATE TABLE vertex(
- id INTEGER
-);
-CREATE TABLE edge(
- src INTEGER,
- dest INTEGER,
- user_id INTEGER
-);
-INSERT INTO vertex VALUES
-(0),
-(1),
-(2),
-(3),
-(4),
-(5),
-(6),
-(10),
-(11),
-(12),
-(13),
-(14),
-(15),
-(16);
-INSERT INTO edge VALUES
-(0, 1, 1),
-(0, 2, 1),
-(1, 2, 1),
-(1, 3, 1),
-(2, 3, 1),
-(2, 5, 1),
-(2, 6, 1),
-(3, 0, 1),
-(5, 6, 1),
-(6, 3, 1),
-(10, 11, 2),
-(10, 12, 2),
-(11, 12, 2),
-(11, 13, 2),
-(12, 13, 2),
-(13, 10, 2),
-(15, 16, 2),
-(15, 14, 2);
-
--- Find all weakly connected components in the graph:
-DROP TABLE IF EXISTS wcc_out;
-SELECT madlib.weakly_connected_components(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge arguments
- 'wcc_out'); -- Output table of weakly connected components
-
--- View the component ID associated with each vertex in the graph:
-SELECT * FROM wcc_out ORDER BY component_id;
-
--- Find all weakly connected components associated with each user, using the
--- grouping feature:
-DROP TABLE IF EXISTS wcc_out;
-SELECT madlib.weakly_connected_components(
- 'vertex', -- Vertex table
- 'id', -- Vertix id column
- 'edge', -- Edge table
- 'src=src, dest=dest', -- Comma delimted string of edge arguments
- 'wcc_out', -- Output table of weakly connected components
- 'user_id'); -- Grouping column
-
--- View the component ID associated with each vertex within the sub-graph
--- associated with each user:
-SELECT * FROM wcc_out ORDER BY user_id, component_id;
-
--- Retrieve the largest connected component
-DROP TABLE IF EXISTS largest_cpt_table;
-SELECT madlib.graph_wcc_largest_cpt(
- 'wcc_out', -- WCC's output table
- 'largest_cpt_table'); -- output table with largest component IDs
-DROP TABLE largest_cpt_table;
-
--- There are several helper functions to use after wcc_out is obtained:
--- Retrieve Histogram of Vertices Per Connected Component
-DROP TABLE IF EXISTS histogram_table;
-SELECT madlib.graph_wcc_histogram(
- 'wcc_out', -- WCC's output table
- 'histogram_table'); -- output table containing the histogram of
vertices
-DROP TABLE histogram_table;
-
--- Check if Two Vertices Belong to the Same Component
-DROP TABLE IF EXISTS vc_table;
-SELECT madlib.graph_wcc_vertex_check(
- 'wcc_out', -- WCC's output table
- '14,15', -- Pair of vertex IDs
- 'vc_table'); -- output table containing components that contain
the
- -- two vertices
-DROP TABLE vc_table;
-
--- Retrieve All Vertices Reachable from a Vertex
-DROP TABLE IF EXISTS reach_table;
-SELECT madlib.graph_wcc_reachable_vertices(
- 'wcc_out', -- WCC's output table
- '0', -- source vertex
- 'reach_table'); -- output table containing all vertices
reachable from
- -- source vertex
-DROP TABLE reach_table;
-
--- Count of Connected Components
-DROP TABLE IF EXISTS count_table;
-SELECT madlib.graph_wcc_num_cpts(
- 'wcc_out', -- WCC's output table
- 'count_table'); -- output table containing number of components
per group
-DROP TABLE count_table;
-"""
- else:
- help_string = """
+ help_string = """
----------------------------------------------------------------------------
SUMMARY
----------------------------------------------------------------------------
@@ -782,10 +663,6 @@ connected component is also a strongly connected component.
--
For an overview on usage, run:
SELECT {schema_madlib}.weakly_connected_components('usage');
-
-For some examples, run:
-SELECT {schema_madlib}.weakly_connected_components('example')
---
"""
return help_string.format(schema_madlib=schema_madlib)
http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/knn/knn.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/knn/knn.py_in
b/src/ports/postgres/modules/knn/knn.py_in
index cfd93d9..c9ae918 100644
--- a/src/ports/postgres/modules/knn/knn.py_in
+++ b/src/ports/postgres/modules/knn/knn.py_in
@@ -342,135 +342,7 @@ prediction The output of KNN- label in case of
classification, average
k_nearest_neighbours The list of k-nearest neighbors that were used in the
voting/averaging.
"""
else:
- if message is not None and \
- message.lower() in ("example", "examples"):
- help_string = """
-----------------------------------------------------------------------------
- EXAMPLES
-----------------------------------------------------------------------------
--- Prepare some training data for classification:
-DROP TABLE IF EXISTS knn_train_data;
-CREATE TABLE knn_train_data (
- id integer,
- data integer[],
- label integer -- Integer label means for classification
- );
-INSERT INTO knn_train_data VALUES
-(1, '{{1,1}}', 1),
-(2, '{{2,2}}', 1),
-(3, '{{3,3}}', 1),
-(4, '{{4,4}}', 1),
-(5, '{{4,5}}', 1),
-(6, '{{20,50}}', 0),
-(7, '{{10,31}}', 0),
-(8, '{{81,13}}', 0),
-(9, '{{1,111}}', 0);
-
--- Prepare some training data for regression:
-DROP TABLE IF EXISTS knn_train_data_reg;
-CREATE TABLE knn_train_data_reg (
- id integer,
- data integer[],
- label float -- Float label means for regression
- );
-INSERT INTO knn_train_data_reg VALUES
-(1, '{{1,1}}', 1.0),
-(2, '{{2,2}}', 1.0),
-(3, '{{3,3}}', 1.0),
-(4, '{{4,4}}', 1.0),
-(5, '{{4,5}}', 1.0),
-(6, '{{20,50}}', 0.0),
-(7, '{{10,31}}', 0.0),
-(8, '{{81,13}}', 0.0),
-(9, '{{1,111}}', 0.0);
-
--- Prepare some testing data:
-DROP TABLE IF EXISTS knn_test_data;
-CREATE TABLE knn_test_data (
- id integer,
- data integer[]
- );
-INSERT INTO knn_test_data VALUES
-(1, '{{2,1}}'),
-(2, '{{2,6}}'),
-(3, '{{15,40}}'),
-(4, '{{12,1}}'),
-(5, '{{2,90}}'),
-(6, '{{50,45}}');
-
--- Run KNN for classification:
-DROP TABLE IF EXISTS knn_result_classification;
-SELECT * FROM {schema_madlib}.knn(
- 'knn_train_data', -- Table of training data
- 'data', -- Col name of training data
- 'id', -- Col name of id in train data
- 'label', -- Training labels
- 'knn_test_data', -- Table of test data
- 'data', -- Col name of test data
- 'id', -- Col name of id in test data
- 'knn_result_classification', -- Output table
- 3, -- Number of nearest neighbors
- True, -- True to list nearest-neighbors by id
- 'madlib.squared_dist_norm2', -- Distance function
- False -- False for not using weighted
average
- );
-SELECT * from knn_result_classification ORDER BY id;
-
-Note that the nearest neighbors are sorted from closest
-to furthest from the corresponding test point.
-
--- Run KNN for regression:
-DROP TABLE IF EXISTS knn_result_regression;
-SELECT * FROM {schema_madlib}.knn(
- 'knn_train_data_reg', -- Table of training data
- 'knn_test_data', -- Table of test data
- 'data', -- Col name of test data
- 'id', -- Col name of id in test data
- 'knn_result_regression', -- Output table
- 3, -- Number of nearest neighbors
- True, -- True to list nearest-neighbors by id
- 'madlib.dist_norm2', -- Distance function
- False -- False for not using weighted average
- );
-SELECT * FROM knn_result_regression ORDER BY id;
-
--- List nearest neighbors only, without doing classification
-or regression:
-DROP TABLE IF EXISTS knn_result_list_neighbors;
-SELECT * FROM {schema_madlib}.knn(
- 'knn_train_data_reg', -- Table of training data
- 'data', -- Col name of training data
- 'id', -- Col Name of id in train data
- NULL, -- NULL training labels means just list
neighbors
- 'knn_test_data', -- Table of test data
- 'data', -- Col name of test data
- 'id', -- Col name of id in test data
- 'knn_result_list_neighbors', -- Output table
- 3 -- Number of nearest neighbors
- );
-SELECT * FROM knn_result_list_neighbors ORDER BY id;
-
--- Run KNN for classification using weighted average:
-DROP TABLE IF EXISTS knn_result_classification;
-SELECT * FROM {schema_madlib}.knn(
- 'knn_train_data', -- Table of training data
- 'data', -- Col name of training data
- 'id', -- Col name of id in train data
- 'label', -- Training labels
- 'knn_test_data', -- Table of test data
- 'data', -- Col name of test data
- 'id', -- Col name of id in test data
- 'knn_result_classification', -- Output table
- 3, -- Number of nearest neighbors
- True, -- True to list nearest-neighbors by id
- 'madlib.squared_dist_norm2', -- Distance function
- True -- Calculation using weighted
average
- );
-SELECT * from knn_result_classification ORDER BY id;
-
-"""
- else:
- help_string = """
+ help_string = """
----------------------------------------------------------------------------
SUMMARY
----------------------------------------------------------------------------
@@ -486,10 +358,6 @@ of k nearest neighbors of the given testing example.
--
For an overview on usage, run:
SELECT {schema_madlib}.knn('usage');
-
-For some examples, run:
-SELECT {schema_madlib}.knn('example')
---
"""
return help_string.format(schema_madlib=schema_madlib)