Github user orhankislal commented on a diff in the pull request:
https://github.com/apache/incubator-madlib/pull/87#discussion_r97140449
--- Diff: src/ports/postgres/modules/assoc_rules/assoc_rules.py_in ---
@@ -504,3 +510,120 @@ def assoc_rules(madlib_schema, support, confidence,
tid_col,
total_rules,
time.time() - begin_func_exec
);
+
+def assoc_rules_help_message(schema_madlib, message=None, **kwargs):
+ """
+ Given a help string, provide usage information
+
+ Args:
+ @param schema_madlib Name of the MADlib schema
+ @param message Helper message to print
+
+ Returns:
+ None
+ """
+ if message is not None and \
+ message.lower() in ("usage", "help", "?"):
+ return """
+-----------------------------------------------------------------------
+ USAGE
+-----------------------------------------------------------------------
+SELECT {schema_madlib}.assoc_rules(
+ support, -- FLOAT8, minimum level of support needed for
each itemset to be included in result
+ confidence, -- FLOAT8, minimum level of confidence needed for
each rule to be included in result
+ tid_col, -- TEXT, name of the column storing the
transaction ids
+ item_col, -- TEXT, name of the column storing the products
+ input_table, -- TEXT, name of the table containing the input
data
+ output_schema, -- TEXT, name of the schema where the final
results will be stored.
+ The schema must be created before calling
the function. Alternatively, use
+ <tt>NULL</tt> to output to the current
schema.
+ verbose, -- BOOLEAN, (optional, default: False) determines
if details are printed for each
+ iteration as the algorithm progresses
+ max_itemset_size -- INTEGER, (optional, default: itemsets of all
sizes) determines the maximum size of frequent
+ itemsets allowed that are used for
generating association rules. Value less
+ than 2 throws an error.
+);
+-------------------------------------------------------------------------
+ OUTPUT TABLES
+-------------------------------------------------------------------------
+The output table "assoc_rules" in the "output_schema" contains a unique
rule of the form "If X, then Y
+(i.e., X => Y)" in each row. X and Y are non-empty itemsets, called the
antecedent and consequent, or
+the left-hand-side (LHS) and right-hand-side (LHS), of the rule
respectively.
+
+in each row, with the following columns:
+ ruleid, -- INTEGER, row number
+ pre, -- TEXT, specifies the antecedent, or the LHS of the rule
+ post, -- DOUBLE, specifies the consequent, or the RHS of the rule
+ support, -- DOUBLE, support of the frequent itemset X,Y
+ count, -- INTEGER, number of transactions in the input table that
contain X,Y
+ confidence, -- DOUBLE, the ratio of number of transactions that
contain X,Y to the number of transactions
+ that contain X
+ lift, -- DOUBLE, the ratio of observed support of X,Y to the
expected support of X,Y, assuming X and
+ Y are independent.
+ conviction -- DOUBLE, the ratio of expected support of X occurring
without Y assuming X and Y are
+ independent, to the observed support of X occuring
without Y
+ """.format(schema_madlib=schema_madlib)
+ else:
+ if message.lower() in ("example", "examples"):
+ return """
+------------------------------------------------------------------------
+ EXAMPLES
+------------------------------------------------------------------------
+DROP TABLE IF EXISTS test_data;
+CREATE TABLE test_data (
+ trans_id INT,
+ product TEXT
+);
+INSERT INTO test_data VALUES (1, 'beer');
+INSERT INTO test_data VALUES (1, 'diapers');
+INSERT INTO test_data VALUES (1, 'chips');
+INSERT INTO test_data VALUES (2, 'beer');
+INSERT INTO test_data VALUES (2, 'diapers');
+INSERT INTO test_data VALUES (3, 'beer');
+INSERT INTO test_data VALUES (3, 'diapers');
+INSERT INTO test_data VALUES (4, 'beer');
+INSERT INTO test_data VALUES (4, 'chips');
+INSERT INTO test_data VALUES (5, 'beer');
+INSERT INTO test_data VALUES (6, 'beer');
+INSERT INTO test_data VALUES (6, 'diapers');
+INSERT INTO test_data VALUES (6, 'chips');
+INSERT INTO test_data VALUES (7, 'beer');
+INSERT INTO test_data VALUES (7, 'diapers');
+\.
--- End diff --
I don't thin this is needed with insert into commands.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---