[1/3] madlib git commit: Documentation: Remove online examples from sql functions.

jingyimei Wed, 15 Aug 2018 12:34:54 -0700

Repository: madlib
Updated Branches:
  refs/heads/master cf5ace944 -> 24a11c1e5



http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/stats/correlation.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/correlation.py_in 
b/src/ports/postgres/modules/stats/correlation.py_in
index 45ff05e..005de75 100644
--- a/src/ports/postgres/modules/stats/correlation.py_in
+++ b/src/ports/postgres/modules/stats/correlation.py_in
@@ -414,57 +414,6 @@ triangle set to NULL. To obtain the result from the 
output_table in this matrix
 format ensure to order the elements using the 'column_position' column.
 
         """.format(schema_madlib=schema_madlib, func=func)
-    elif message is not None and message.lower() in ('example', 'examples'):
-        return """
-DROP TABLE IF EXISTS example_data;
-CREATE TABLE example_data(
-    id SERIAL,
-    outlook text,
-    temperature float8,
-    humidity float8,
-    windy text,
-    class text) ;
-
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('sunny', 85, 85, 'false', E'Dont Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('sunny', 80, 90, 'true', E'Dont Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('overcast', 83, 78, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('rain', 70, 96, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('rain', 68, 80, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('rain', 65, 70, 'true', E'Dont Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('overcast', 64, 65, 'true', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('sunny', 72, 95, 'false', E'Dont Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('sunny', 69, 70, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('rain', 75, 80, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('sunny', 75, 70, 'true', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('overcast', 72, 90, 'true', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('overcast', 81, 75, 'false', 'Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES('rain', 71, 80, 'true', E'Dont Play');
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES(NULL, 100, 100, 'true', NULL);
-INSERT INTO example_data(outlook, temperature, humidity, windy, class)
-VALUES(NULL, 110, 100, 'true', NULL);
-
-SELECT madlib.{func}('example_data', 'example_data_output');
-SELECT madlib.{func}('example_data', 'example_data_output', '*');
-SELECT madlib.{func}('example_data', 'example_data_output', 'temperature, 
humidity');
-
--- To get the {func} matrix from output table:
-SELECT * from example_data_output order by column_position;
-         """.format(func=func)
     else:
         if cov:
             return """
@@ -478,9 +427,6 @@ covariance is negative. The sign of the covariance 
therefore shows the tendency
 -------
 For an overview on usage, run:
     SELECT {schema_madlib}.covariance('usage');
--------
-For examples:
-    SELECT {schema_madlib}.covariance('example');
             """.format(schema_madlib=schema_madlib)
         else:
             return """
@@ -492,8 +438,5 @@ perfectly anti-correlated.
 -------
 For an overview on usage, run:
     SELECT {schema_madlib}.correlation('usage');
--------
-For examples:
-    SELECT {schema_madlib}.correlation('example');
             """.format(schema_madlib=schema_madlib)
 # 
------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/stats/cox_prop_hazards.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/stats/cox_prop_hazards.py_in 
b/src/ports/postgres/modules/stats/cox_prop_hazards.py_in
index 706503a..f73c14f 100644
--- a/src/ports/postgres/modules/stats/cox_prop_hazards.py_in
+++ b/src/ports/postgres/modules/stats/cox_prop_hazards.py_in
@@ -64,8 +64,6 @@ the probability that death has happened before time t.
 
 For more details on function usage:
     SELECT {schema_madlib}.coxph_train('usage')
-For an example on using the function:
-    SELECT {schema_madlib}.coxph_train('example')
 """
 
     elif message in ['usage', 'help', '?']:
@@ -110,55 +108,6 @@ The output summary table is named as 
<output_table>_summary has the following co
                                         due to missing values
 
         """
-
-    elif message in ['example', 'examples']:
-        help_string = """
-DROP TABLE IF EXISTS sample_data;
-CREATE TABLE sample_data (
-    id INTEGER NOT NULL,
-    grp DOUBLE PRECISION,
-    wbc DOUBLE PRECISION,
-    timedeath INTEGER,
-    status BOOLEAN
-);
-
-COPY sample_data FROM STDIN DELIMITER '|';
-  0 |   0 | 1.45 |        35 | t
-  1 |   0 | 1.47 |        34 | t
-  3 |   0 |  2.2 |        32 | t
-  4 |   0 | 1.78 |        25 | t
-  5 |   0 | 2.57 |        23 | t
-  6 |   0 | 2.32 |        22 | t
-  7 |   0 | 2.01 |        20 | t
-  8 |   0 | 2.05 |        19 | t
-  9 |   0 | 2.16 |        17 | t
- 10 |   0 |  3.6 |        16 | t
- 11 |   1 |  2.3 |        15 | t
- 12 |   0 | 2.88 |        13 | t
- 13 |   1 |  1.5 |        12 | t
- 14 |   0 |  2.6 |        11 | t
- 15 |   0 |  2.7 |        10 | t
- 16 |   0 |  2.8 |         9 | t
- 17 |   1 | 2.32 |         8 | t
- 18 |   0 | 4.43 |         7 | t
- 19 |   0 | 2.31 |         6 | t
- 20 |   1 | 3.49 |         5 | t
- 21 |   1 | 2.42 |         4 | t
- 22 |   1 | 4.01 |         3 | t
- 23 |   1 | 4.91 |         2 | t
- 24 |   1 |    5 |         1 | t
-\.
-
-SELECT {schema_madlib}.coxph_train(
-    'sample_data',
-    'sample_cox',
-    'timedeath',
-    'ARRAY[grp,wbc]',
-    'status');
-
-SELECT * FROM sample_cox;
-        """
-
     else:
         help_string = "No such option. Use {schema_madlib}.coxph_train()"
 

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/summary/summary.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/summary/summary.py_in 
b/src/ports/postgres/modules/summary/summary.py_in
index 1dd6c61..ecd8726 100644
--- a/src/ports/postgres/modules/summary/summary.py_in
+++ b/src/ports/postgres/modules/summary/summary.py_in
@@ -159,43 +159,6 @@ def summary_help_message(schema_madlib, message, **kwargs):
             - most_frequent_values  : Most frequent values
             - mfv_frequencies       : Frequency of the most frequent values
         """.format(madlib=schema_madlib)
-    elif message is not None and message.lower() in ('example', 'examples'):
-        return """
-            DROP TABLE IF EXISTS example_data;
-            CREATE TABLE example_data(
-                id SERIAL,
-                outlook text,
-                temperature float8,
-                humidity float8,
-                windy text,
-                class text) ;
-
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('sunny', 85, 85, 'false', E'Don\\'t Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('sunny', 80, 90, 'true', E'Don\\'t Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('overcast', 83, 78, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('rain', 70, 96, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('rain', 68, 80, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('rain', 65, 70, 'true', E'Don\\'t Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('overcast', 64, 65, 'true', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('sunny', 72, 95, 'false', E'Don\\'t Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('sunny', 69, 70, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('rain', 75, 80, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('sunny', 75, 70, 'true', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('overcast', 72, 90, 'true', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('overcast', 81, 75, 'false', 'Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('rain', 71, 80, 'true', E'Don\\'t Play');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES(' ', 100, 100, 'true', ' ');
-            INSERT INTO example_data(outlook, temperature, humidity, windy, 
class) VALUES('', 110, 100, 'true', '');
-
-            SELECT madlib.summary('example_data', 'example_data_output');
-            SELECT madlib.summary('example_data', 'example_data_output', 
'windy');
-            SELECT madlib.summary('example_data', 'example_data_output', 
'windy,humidity');
-            SELECT madlib.summary('example_data', 'example_data_output', 'id', 
'windy');
-            SELECT madlib.summary('example_data', 'example_data_output', NULL, 
NULL, True, True, array[0.1, 0.2, 0.3]);
-            SELECT madlib.summary('example_data', 'example_data_output', NULL, 
NULL, True, True, array[0.1, 0.2, 0.3], 2);
-            SELECT madlib.summary('example_data', 'example_data_output', NULL, 
NULL, True, True, array[0.1, 0.2, 0.3], 2, False);
-            SELECT madlib.summary('example_data', 'example_data_output', NULL, 
NULL, True, True, array[0.1, 0.2, 0.3], 2, False, 2);
-         """
     else:
         return """
             'summary' is a generic function used to produce summary statistics
@@ -204,7 +167,4 @@ def summary_help_message(schema_madlib, message, **kwargs):
             -------
             For an overview on usage, run:
             SELECT {madlib}.summary('usage');
-            -------
-            For an example, run:
-            SELECT {madlib}.summary('example')
             """.format(madlib=schema_madlib)

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/svm/svm.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/svm/svm.py_in 
b/src/ports/postgres/modules/svm/svm.py_in
index b8780ab..d2d22c4 100644
--- a/src/ports/postgres/modules/svm/svm.py_in
+++ b/src/ports/postgres/modules/svm/svm.py_in
@@ -516,9 +516,6 @@ def svm_one_class_help(schema_madlib, message, is_svc, 
**kwargs):
 
     For more details on function usage:
         SELECT {schema_madlib}.{method}('usage')
-
-    For a small example on using the function:
-        SELECT {schema_madlib}.{method}('example')
         """.format(**args)
 
     usage = """
@@ -627,89 +624,11 @@ def svm_one_class_help(schema_madlib, message, is_svc, 
**kwargs):
     gaussian_usage = get_svc_gaussian_usage_string()
     poly_usage = get_svc_poly_usage_string()
 
-    example_usage = """
-    ---------------------------------------------------------------------------
-                                  EXAMPLES
-    ---------------------------------------------------------------------------
-    - Create an input data set.
-
-    CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
-                size INT, lot INT);
-    COPY houses FROM STDIN WITH DELIMITER '|';
-      1 |  590 |       2 |    1 |  50000 |  770 | 22100
-      2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
-      3 |   20 |       3 |    1 |  22500 | 1060 |  3500
-      4 |  870 |       2 |    2 |  90000 | 1300 | 17500
-      5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
-      6 | 1350 |       2 |    1 |  90500 |  820 | 25700
-      7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
-      8 |  680 |       2 |    1 | 142500 | 1170 | 22000
-      9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
-     10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
-     11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
-     12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
-     13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
-     14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
-     15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
-    \.
-
-    - Generate a non-linear one-class SVM using a Gaussian kernel. We
-      specify the initial step size and maximum number of iterations to run.
-      As part of the kernel parameter, we choose 10 as the dimension of the
-      space where we train SVM. A larger number will lead to a more powerful
-      model but run the risk of overfitting. As a result, the model will be a
-      10 dimensional vector.
-
-    select {schema_madlib}.svm_one_class('houses',
-                                'houses_one_class_gaussian',
-                                'ARRAY[1,tax,bedroom,bath,size,lot,price]',
-                                'gaussian',
-                                'gamma=0.01,n_components=10',
-                                NULL,
-                                'max_iter=250, init_stepsize=100,lambda=0.9'
-                                );
-
-    - Create a test data set.
-    DROP TABLE IF EXISTS houses_novelty_test;
-    CREATE TABLE houses_novelty_test (id INT, tax INT, bedroom INT, bath 
FLOAT, price INT,
-                size INT, lot INT);
-    COPY houses_novelty_test FROM STDIN WITH DELIMITER '|';
-      1 |  33590 |       12 |    11 |  5000000 |  12770 | 221100
-      2 | 1050 |       31 |    21 |  85000000 | 141210 | 120010
-      3 |   233330 |     13 |    11 |  22500000 | 112060 |  351100
-      4 |  833370 |       12 |    12 |  9000000 | 130120 | 1751100
-      5 | 132330 |       31 |    12 | 133000000 | 150120 | 30011100
-      6 | 135330 |       21 |    11 |  90500000 |  8212120 | 25711100
-      7 | 279330 |       31 |  21.5 | 260000000 | 213012 | 25011100
-      8 | 6803333 |       12 |    11 | 142500000 | 117012 | 22111000
-      9 | 33331840 |       31 |    12 | 160000000 | 150120 | 19011100
-     10 | 3780 |       4 |    2 | 220000 | 2790 | 21000
-     11 | 1760 |       3 |    1 |  77000 | 1030 | 18500
-     12 | 1520 |       3 |    2 | 128600 | 1250 | 21000
-     13 | 3000 |       3 |    2 | 130000 | 1760 | 37000
-     14 | 2170 |       2 |    3 | 138000 | 1550 | 13000
-     15 |  750 |       3 |  1.5 |  75000 | 1450 | 13000
-    \.
-
-    - Use the prediction function to evaluate the models. The predicted
-      results are in the prediction column and the actual data is in the
-      target column.
-    -- For the Gaussian model:
-    SELECT {schema_madlib}.svm_predict('houses_one_class_gaussian',
-                                       'houses_test',
-                                       'id',
-                                       'houses_pred_gaussian');
-    -- View the results of the prediction function:
-    SELECT * FROM houses_novelty_test JOIN houses_pred_gaussian USING (id) 
ORDER BY id;
-
-    """.format(**args)
 
     if not message:
         return summary
     elif message.lower() in ('usage', 'help', '?'):
         return usage
-    elif message.lower() == 'example':
-        return example_usage
     elif message.lower() == 'params':
         return params_usage
     elif message.lower() == 'gaussian':
@@ -849,84 +768,10 @@ def svm_help(schema_madlib, message, is_svc, **kwargs):
     gaussian_usage = get_svc_gaussian_usage_string()
     poly_usage = get_svc_poly_usage_string()
 
-    example_usage = """
-    ---------------------------------------------------------------------------
-                                  EXAMPLES
-    ---------------------------------------------------------------------------
-    - Create an input data set.
-
-    CREATE TABLE houses (id INT, tax INT, bedroom INT, bath FLOAT, price INT,
-                size INT, lot INT);
-    COPY houses FROM STDIN WITH DELIMITER '|';
-      1 |  590 |       2 |    1 |  50000 |  770 | 22100
-      2 | 1050 |       3 |    2 |  85000 | 1410 | 12000
-      3 |   20 |       3 |    1 |  22500 | 1060 |  3500
-      4 |  870 |       2 |    2 |  90000 | 1300 | 17500
-      5 | 1320 |       3 |    2 | 133000 | 1500 | 30000
-      6 | 1350 |       2 |    1 |  90500 |  820 | 25700
-      7 | 2790 |       3 |  2.5 | 260000 | 2130 | 25000
-      8 |  680 |       2 |    1 | 142500 | 1170 | 22000
-      9 | 1840 |       3 |    2 | 160000 | 1500 | 19000
-     10 | 3680 |       4 |    2 | 240000 | 2790 | 20000
-     11 | 1660 |       3 |    1 |  87000 | 1030 | 17500
-     12 | 1620 |       3 |    2 | 118600 | 1250 | 20000
-     13 | 3100 |       3 |    2 | 140000 | 1760 | 38000
-     14 | 2070 |       2 |    3 | 148000 | 1550 | 14000
-     15 |  650 |       3 |  1.5 |  65000 | 1450 | 12000
-    \.
-
-    - Train a classification model, using a linear model.
-
-    SELECT {schema_madlib}.svm_classification('houses',
-                                     'houses_svm',
-                                     'price < 100000',
-                                     'ARRAY[1, tax, bath, size]');
-
-    - Generate a nonlinear model using a Gaussian kernel. This time we
-      specify the initial step size and maximum number of iterations to run.
-      As part of the kernel parameter, we choose 10 as the dimension of the
-      space where we train SVM. A larger number will lead to a more powerful
-      model but run the risk of overfitting. As a result, the model will be a
-      10 dimensional vector, instead of 4 as in the case of linear model.
-
-    SELECT {schema_madlib}.svm_classification( 'houses',
-                                      'houses_svm_gaussian',
-                                      'price < 100000',
-                                      'ARRAY[1, tax, bath, size]',
-                                      'gaussian',
-                                      'n_components=10',
-                                      '',
-                                      'init_stepsize=1, max_iter=200');
-
-    - Use the prediction function to evaluate the models. The predicted
-      results are in the prediction column and the actual data is in the
-      target column.
-
-    -- For the linear model:
-    SELECT {schema_madlib}.svm_predict('houses_svm',
-                                       'houses',
-                                       'id',
-                                       'houses_pred');
-    SELECT *, price < 100000 AS target
-    FROM houses JOIN houses_pred
-    USING (id) ORDER BY id;
-
-    -- For the Gaussian model:
-    SELECT {schema_madlib}.svm_predict('houses_svm_gaussian',
-                                       'houses',
-                                       'id',
-                                       'houses_pred_gaussian');
-    SELECT *, price < 100000 AS target
-    FROM houses JOIN houses_pred_gaussian
-    USING (id) ORDER BY id;
-    """.format(**args)
-
     if not message:
         return summary
     elif message.lower() in ('usage', 'help', '?'):
         return usage
-    elif message.lower() in ('example', 'examples'):
-        return example_usage
     elif message.lower() == 'params':
         return params_usage
     elif message.lower() == 'gaussian':

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in 
b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
index cbcd9b7..1238104 100644
--- a/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
+++ b/src/ports/postgres/modules/utilities/minibatch_preprocessing.py_in
@@ -490,9 +490,6 @@ class MiniBatchDocumentation:
 
         For more details on function usage:
         SELECT {schema_madlib}.{method}('usage')
-
-        For a small example on using the function:
-        SELECT {schema_madlib}.{method}('example')
         """.format(**locals())
 
         usage = """
@@ -557,58 +554,11 @@ class MiniBatchDocumentation:
                              for normalization).
         """.format(**locals())
 
-        example = """
-        -- Create input table
-        CREATE TABLE iris_data(
-            id INTEGER,
-            attributes NUMERIC[],
-            class_text text,
-            class INTEGER,
-            state VARCHAR
-        );
-
-        COPY iris_data (attributes, class_text, class, state) FROM STDIN NULL 
'?' DELIMITER '|';
-        {4.4,3.2,1.3,0.2}|Iris_setosa|1|Alaska
-        {5.0,3.5,1.6,0.6}|Iris_setosa|1|Alaska
-        {5.1,3.8,1.9,0.4}|Iris_setosa|1|Alaska
-        {4.8,3.0,1.4,0.3}|Iris_setosa|1|Alaska
-        {5.1,3.8,1.6,0.2}|Iris_setosa|1|Alaska
-        {5.7,2.8,4.5,1.3}|Iris_versicolor|2|Alaska
-        {6.3,3.3,4.7,1.6}|Iris_versicolor|2|Alaska
-        {4.9,2.4,3.3,1.0}|Iris_versicolor|2|Alaska
-        {6.6,2.9,4.6,1.3}|Iris_versicolor|2|Alaska
-        {5.2,2.7,3.9,1.4}|Iris_versicolor|2|Alaska
-        {5.0,2.0,3.5,1.0}|Iris_versicolor|2|Alaska
-        {4.8,3.0,1.4,0.1}|Iris_setosa|1|Tennessee
-        {4.3,3.0,1.1,0.1}|Iris_setosa|1|Tennessee
-        {5.8,4.0,1.2,0.2}|Iris_setosa|1|Tennessee
-        {5.7,4.4,1.5,0.4}|Iris_setosa|1|Tennessee
-        {5.4,3.9,1.3,0.4}|Iris_setosa|1|Tennessee
-        {6.0,2.9,4.5,1.5}|Iris_versicolor|2|Tennessee
-        {5.7,2.6,3.5,1.0}|Iris_versicolor|2|Tennessee
-        {5.5,2.4,3.8,1.1}|Iris_versicolor|2|Tennessee
-        {5.5,2.4,3.7,1.0}|Iris_versicolor|2|Tennessee
-        {5.8,2.7,3.9,1.2}|Iris_versicolor|2|Tennessee
-        {6.0,2.7,5.1,1.6}|Iris_versicolor|2|Tennessee
-        \.
-
-        -- #TODO add description here
-        DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_standardization, 
iris_data_batch_summary;
-        SELECT madlib.minibatch_preprocessor('iris_data', 'iris_data_batch', 
'class_text', 'attributes', 3);
-
-
-        -- #TODO add description here NULL buffer size
-        DROP TABLE IF EXISTS iris_data_batch, iris_data_batch_standardization, 
iris_data_batch_summary;
-        SELECT madlib.minibatch_preprocessor('iris_data', 'iris_data_batch', 
'class_text', 'attributes');
-
-        """
 
         if not message:
             return summary
         elif message.lower() in ('usage', 'help', '?'):
             return usage
-        elif message.lower() == 'example':
-            return example
         return """
             No such option. Use "SELECT 
{schema_madlib}.minibatch_preprocessor()"
             for help.

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/utilities/path.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/path.py_in 
b/src/ports/postgres/modules/utilities/path.py_in
index 37457ff..acbaf8d 100644
--- a/src/ports/postgres/modules/utilities/path.py_in
+++ b/src/ports/postgres/modules/utilities/path.py_in
@@ -321,9 +321,6 @@ involved like aggregation.
 
 For more details on function usage:
     SELECT {schema_madlib}.path('usage');
-
-For a small example on using the function:
-    SELECT {schema_madlib}.path('example');
     """.format(schema_madlib=schema_madlib)
 
     usage_string = """
@@ -345,76 +342,10 @@ SELECT {schema_madlib}.path(
 );
     """.format(schema_madlib=schema_madlib)
 
-    example_string = """
----------------------------------------------------------------------------
-                                EXAMPLE
----------------------------------------------------------------------------
-- Create an input data set.
-
-DROP TABLE IF EXISTS eventlog, path_output, path_output_tuples;
-CREATE TABLE eventlog (event_timestamp TIMESTAMP,
-            user_id INT,
-            session_id INT,
-            page TEXT,
-            revenue FLOAT);
-INSERT INTO eventlog VALUES
-('04/15/2015 01:03:00', 100821, 100, 'LANDING', 0),
-('04/15/2015 01:04:00', 100821, 100, 'WINE', 0),
-('04/15/2015 01:05:00', 100821, 100, 'CHECKOUT', 39),
-('04/15/2015 02:06:00', 100821, 101, 'WINE', 0),
-('04/15/2015 02:09:00', 100821, 101, 'WINE', 0),
-('04/15/2015 01:15:00', 101121, 102, 'LANDING', 0),
-('04/15/2015 01:16:00', 101121, 102, 'WINE', 0),
-('04/15/2015 01:17:00', 101121, 102, 'CHECKOUT', 15),
-('04/15/2015 01:18:00', 101121, 102, 'LANDING', 0),
-('04/15/2015 01:19:00', 101121, 102, 'HELP', 0),
-('04/15/2015 01:21:00', 101121, 102, 'WINE', 0),
-('04/15/2015 01:22:00', 101121, 102, 'CHECKOUT', 23),
-('04/15/2015 02:15:00', 101331, 103, 'LANDING', 0),
-('04/15/2015 02:16:00', 101331, 103, 'WINE', 0),
-('04/15/2015 02:17:00', 101331, 103, 'HELP', 0),
-('04/15/2015 02:18:00', 101331, 103, 'WINE', 0),
-('04/15/2015 02:19:00', 101331, 103, 'CHECKOUT', 16),
-('04/15/2015 02:22:00', 101443, 104, 'BEER', 0),
-('04/15/2015 02:25:00', 101443, 104, 'CHECKOUT', 12),
-('04/15/2015 02:29:00', 101881, 105, 'LANDING', 0),
-('04/15/2015 02:30:00', 101881, 105, 'BEER', 0),
-('04/15/2015 01:05:00', 102201, 106, 'LANDING', 0),
-('04/15/2015 01:06:00', 102201, 106, 'HELP', 0),
-('04/15/2015 01:09:00', 102201, 106, 'LANDING', 0),
-('04/15/2015 02:15:00', 102201, 107, 'WINE', 0),
-('04/15/2015 02:16:00', 102201, 107, 'BEER', 0),
-('04/15/2015 02:17:00', 102201, 107, 'WINE', 0),
-('04/15/2015 02:18:00', 102871, 108, 'BEER', 0),
-('04/15/2015 02:19:00', 102871, 108, 'WINE', 0),
-('04/15/2015 02:22:00', 102871, 108, 'CHECKOUT', 21),
-('04/15/2015 02:25:00', 102871, 108, 'LANDING', 0),
-('04/15/2015 02:17:00', 103711, 109, 'BEER', 0),
-('04/15/2015 02:18:00', 103711, 109, 'LANDING', 0),
-('04/15/2015 02:19:00', 103711, 109, 'WINE', 0);
-
-- Calculate the revenue by checkout:
-
-SELECT {schema_madlib}.path(
-     'eventlog',                -- Name of input table
-     'path_output',             -- Table name to store path results
-     'session_id',              -- Partition input table by session
-     'event_timestamp ASC',     -- Order partitions in input table by time
-     'buy:=page=''CHECKOUT''',  -- Define a symbol for checkout events
-     '(buy)',                   -- Pattern search: purchase
-     'sum(revenue) as checkout_rev',    -- Aggregate:  sum revenue by checkout
-     TRUE                       -- Persist matches
-     );
-
-SELECT * FROM path_output ORDER BY session_id, match_id;
-    """.format(schema_madlib=schema_madlib)
-
     if not message:
         return summary_string
     elif message.lower() in ('usage', 'help', '?'):
         return usage_string
-    elif message.lower() in ('example', 'examples'):
-        return example_string
     else:
         return """
 No such option. Use "SELECT {schema_madlib}.path()" for help.

http://git-wip-us.apache.org/repos/asf/madlib/blob/24a11c1e/src/ports/postgres/modules/utilities/sessionize.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/utilities/sessionize.py_in 
b/src/ports/postgres/modules/utilities/sessionize.py_in
index ccd0a3e..278e1f8 100644
--- a/src/ports/postgres/modules/utilities/sessionize.py_in
+++ b/src/ports/postgres/modules/utilities/sessionize.py_in
@@ -131,15 +131,12 @@ def sessionize_help_message(schema_madlib, message, 
**kwargs):
 
-----------------------------------------------------------------------------------
 Functionality: Sessionize
 
-The MADlib sessionize function performs time-oriented session reconstruction 
on a 
-data set comprising a sequence of events. A defined period of inactivity 
indicates 
+The MADlib sessionize function performs time-oriented session reconstruction 
on a
+data set comprising a sequence of events. A defined period of inactivity 
indicates
 the end of one session and beginning of the next session.
 
 For more details on function usage:
     SELECT {schema_madlib}.sessionize('usage');
-
-For a small example on using the function:
-    SELECT {schema_madlib}.sessionize('example');
     """.format(schema_madlib=schema_madlib)
 
     usage_string = """
@@ -157,92 +154,17 @@ SELECT {schema_madlib}.sessionize(
                         -- a session
     'output_cols'       -- str, An optional valid postgres SELECT expression 
for the
                         -- output table/view (default *)
-    'create_view'       -- boolean, Optional parameter to specify if output is 
a 
+    'create_view'       -- boolean, Optional parameter to specify if output is 
a
                         -- view or materilized to a table (default True)
 );
     """.format(schema_madlib=schema_madlib)
 
-    example_string = """
------------------------------------------------------------------------------------
-                                    EXAMPLE
------------------------------------------------------------------------------------
-- Create an input data set:
-
-DROP TABLE IF EXISTS eventlog;
-CREATE TABLE eventlog (event_timestamp TIMESTAMP,
-            user_id INT,
-            page TEXT,
-            revenue FLOAT);
-INSERT INTO eventlog VALUES
-('04/15/2015 02:19:00', 101331, 'CHECKOUT', 16), 
-('04/15/2015 02:17:00', 202201, 'WINE', 0), 
-('04/15/2015 03:18:00', 202201, 'BEER', 0), 
-('04/15/2015 01:03:00', 100821, 'LANDING', 0), 
-('04/15/2015 01:04:00', 100821, 'WINE', 0), 
-('04/15/2015 01:05:00', 100821, 'CHECKOUT', 39), 
-('04/15/2015 02:06:00', 100821, 'WINE', 0), 
-('04/15/2015 02:09:00', 100821, 'WINE', 0), 
-('04/15/2015 02:15:00', 101331, 'LANDING', 0), 
-('04/15/2015 02:16:00', 101331, 'WINE', 0), 
-('04/15/2015 02:17:00', 101331, 'HELP', 0), 
-('04/15/2015 02:18:00', 101331, 'WINE', 0), 
-('04/15/2015 02:29:00', 201881, 'LANDING', 0), 
-('04/15/2015 02:30:00', 201881, 'BEER', 0), 
-('04/15/2015 01:05:00', 202201, 'LANDING', 0),
-('04/15/2015 01:06:00', 202201, 'HELP', 0), 
-('04/15/2015 01:09:00', 202201, 'LANDING', 0), 
-('04/15/2015 02:15:00', 202201, 'WINE', 0), 
-('04/15/2015 02:16:00', 202201, 'BEER', 0), 
-('04/15/2015 03:19:00', 202201, 'WINE', 0), 
-('04/15/2015 03:22:00', 202201, 'CHECKOUT', 21);
-
-- Sessionize the table for each user_id, and obtain only the user_id, with 
partition
-expression, event_timestamp and session_id:
-
-SELECT {schema_madlib}.sessionize(
- 'eventlog',            -- Name of input table
- 'sessionize_output',   -- Table name to store sessionized results
- 'user_id',             -- Partition input table by session
- 'event_timestamp',     -- Order partitions in input table by time
- '0:30:0'               -- Use 30 minute time out to define sessions
- );
-
-- View the output table containing the session IDs:
-
-SELECT * FROM sessionize_output;
-
-DROP VIEW sessionize_output;
-
-- Sessionize the table for each user_id, and materialize all columns from
-source table into an output table:
-
-SELECT {schema_madlib}.sessionize(
- 'eventlog',                 -- Name of input table
- 'sessionize_output',        -- Table name to store sessionized results
- 'user_id < 200000',         -- Partition input table by session
- 'event_timestamp',          -- Order partitions in input table by time
- '180',                      -- Use 3 minutes (180 seconds) to define sessions
- 'event_timestamp, user_id, user_id < 200000 AS "Department-A1"', 
-                             -- Select only the required columns, along with 
the
-                             -- session id column that is selected by default
- 'false'                     -- Materialize results into a table, and not a 
view
- );
-
-- View the output table containing the session IDs:
-
-SELECT * FROM sessionize_output WHERE "Department-A1"='TRUE';
-
-DROP TABLE sessionize_output;
-    """.format(schema_madlib=schema_madlib)
-
     help_string = summary_string
 
     if not message:
         return summary_string
     elif message.lower() in ('usage', 'help', '?'):
         return usage_string
-    elif message.lower() == 'example':
-        return example_string
     else:
         return """
 No such option. Use "SELECT {schema_madlib}.sessionize()" for help.

[1/3] madlib git commit: Documentation: Remove online examples from sql functions.

Reply via email to