[ 
https://issues.apache.org/jira/browse/MADLIB-1327?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16891487#comment-16891487
 ] 

Frank McQuillan commented on MADLIB-1327:
-----------------------------------------

code for testing

{code}
DROP TABLE IF EXISTS test_data;
CREATE TABLE test_data (
trans_id INT,
product TEXT
);
INSERT INTO test_data VALUES (1, 'beer');
INSERT INTO test_data VALUES (1, 'diapers');
INSERT INTO test_data VALUES (1, 'chips');
INSERT INTO test_data VALUES (2, 'beer');
INSERT INTO test_data VALUES (2, 'diapers');
INSERT INTO test_data VALUES (3, 'beer');
INSERT INTO test_data VALUES (3, 'diapers');
INSERT INTO test_data VALUES (4, 'beer');
INSERT INTO test_data VALUES (4, 'chips');
INSERT INTO test_data VALUES (5, 'beer');
INSERT INTO test_data VALUES (6, 'beer');
INSERT INTO test_data VALUES (6, 'diapers');
INSERT INTO test_data VALUES (6, 'chips');
INSERT INTO test_data VALUES (7, 'beer');
INSERT INTO test_data VALUES (7, 'diapers');

INSERT INTO test_data VALUES (1, 'bee');
INSERT INTO test_data VALUES (1, 'diaper');
INSERT INTO test_data VALUES (1, 'chip');
INSERT INTO test_data VALUES (2, 'bee');
INSERT INTO test_data VALUES (2, 'diaper');
INSERT INTO test_data VALUES (3, 'bee');
INSERT INTO test_data VALUES (3, 'diaper');
INSERT INTO test_data VALUES (4, 'bee');
INSERT INTO test_data VALUES (4, 'chip');
INSERT INTO test_data VALUES (5, 'bee');
INSERT INTO test_data VALUES (6, 'bee');
INSERT INTO test_data VALUES (6, 'diaper');
INSERT INTO test_data VALUES (6, 'chip');
INSERT INTO test_data VALUES (7, 'bee');
INSERT INTO test_data VALUES (7, 'diaper');

INSERT INTO test_data VALUES (1, 'be');
INSERT INTO test_data VALUES (1, 'diape');
INSERT INTO test_data VALUES (1, 'chi');
INSERT INTO test_data VALUES (2, 'be');
INSERT INTO test_data VALUES (2, 'diape');
INSERT INTO test_data VALUES (3, 'be');
INSERT INTO test_data VALUES (3, 'diape');
INSERT INTO test_data VALUES (4, 'be');
INSERT INTO test_data VALUES (4, 'chi');
INSERT INTO test_data VALUES (5, 'be');
INSERT INTO test_data VALUES (6, 'be');
INSERT INTO test_data VALUES (6, 'diape');
INSERT INTO test_data VALUES (6, 'chi');
INSERT INTO test_data VALUES (7, 'be');
INSERT INTO test_data VALUES (7, 'diape');

INSERT INTO test_data VALUES (1, 'bez');
INSERT INTO test_data VALUES (1, 'diapez');
INSERT INTO test_data VALUES (1, 'chiz');
INSERT INTO test_data VALUES (2, 'bez');
INSERT INTO test_data VALUES (2, 'diapez');
INSERT INTO test_data VALUES (3, 'bez');
INSERT INTO test_data VALUES (3, 'diapez');
INSERT INTO test_data VALUES (4, 'bez');
INSERT INTO test_data VALUES (4, 'chiz');
INSERT INTO test_data VALUES (5, 'bez');
INSERT INTO test_data VALUES (6, 'bez');
INSERT INTO test_data VALUES (6, 'diapez');
INSERT INTO test_data VALUES (6, 'chiz');
INSERT INTO test_data VALUES (7, 'bez');
INSERT INTO test_data VALUES (7, 'diapez');


SELECT * FROM madlib.assoc_rules( .25,            -- Support
.5,             -- Confidence
'trans_id',     -- Transaction id col
'product',      -- Product col
'test_data',    -- Input data
NULL,           -- Output schema
FALSE,          -- Verbose output
NULL,           -- Max itemset size (default 10)
NULL,           -- Max LHS
1                               -- Max RHS
);

select max(array_length(array_cat(pre, post),1)) from assoc_rules;
select max(array_length(pre,1)) from assoc_rules;
select avg(array_length(pre,1)) from assoc_rules;
select count(*) from assoc_rules where array_length(pre,1) = 1;

select avg(array_length(array_cat(pre, post),1)) from assoc_rules;
select max(array_length(post,1)) from assoc_rules;
select avg(array_length(post,1)) from assoc_rules;
select count(*) from assoc_rules where array_length(post,1) = 1;
{code}





> Add option to set number of posterior in association rules
> ----------------------------------------------------------
>
>                 Key: MADLIB-1327
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1327
>             Project: Apache MADlib
>          Issue Type: Improvement
>          Components: Module: Association Rules
>            Reporter: Frank McQuillan
>            Assignee: Orhan Kislal
>            Priority: Major
>             Fix For: v1.17
>
>         Attachments: Association_rules_informal_scale_test_-_Google_Sheets.pdf
>
>
> Goal is to speed up rule generation. The arules package in R limits posterior 
> to 1.
> In MADlib the goal of the feature is to allow users to specify number of 
> posterior items to 1 or 2 or ... with the goal of reducing run-time.
> Does it make sense to specify number of anterior items too?



--
This message was sent by Atlassian JIRA
(v7.6.14#76016)

Reply via email to