[ 
https://issues.apache.org/jira/browse/MADLIB-1215?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16408162#comment-16408162
 ] 

Frank McQuillan commented on MADLIB-1215:
-----------------------------------------

{code}
DROP TABLE IF EXISTS mat_group;
CREATE TABLE mat_group (
    id integer,
    row_vec double precision[],
    matrix_id text
);
INSERT INTO mat_group VALUES
(1, '{1,2,3}', 'a'),
(2, '{2,1,2}', 'a'),
(3, '{3,2,1}', 'a'),
(4, '{1,2,3,4,5}', 'b'),
(5, '{2,5,2,4,1}', 'b'),
(6, '{5,4,3,2,1}', 'b');

DROP TABLE IF EXISTS result_table_group, result_table_group_mean;
SELECT madlib.pca_train('mat_group',             -- Source table
                        'result_table_group',    -- Output table
                        'id',                    -- Row id of source table
                         0.8,                    -- Proportion of variance
                        'matrix_id');            -- Grouping column
SELECT * FROM result_table_group ORDER BY matrix_id, row_id;
{code}
produces
{code}
 row_id |                                      principal_components             
                          |     std_dev      |    proportion     | matrix_id 
--------+-------------------------------------------------------------------------------------------------+------------------+-------------------+-----------
      1 | {0.707106781186547,5.55111512312578e-17,-0.707106781186547}           
                          | 1.41421356237309 | 0.857142857142245 | a
      1 | 
{0.555378486712784,0.388303582074091,-0.0442457354870796,-0.255566375612852,-0.688115693174023}
 |  3.2315220311722 | 0.764102534485172 | b
      2 | 
{0.587384101786277,-0.485138064894743,0.311532046315152,-0.449458074050715,0.347212037159181}
   |   1.795531127192 | 0.235897465516047 | b
(3 rows)
{code}

so this looks good now.


> PCA error with text grouping column
> -----------------------------------
>
>                 Key: MADLIB-1215
>                 URL: https://issues.apache.org/jira/browse/MADLIB-1215
>             Project: Apache MADlib
>          Issue Type: Bug
>            Reporter: Rashmi Raghu
>            Assignee: Rahul Iyer
>            Priority: Minor
>             Fix For: v1.14
>
>
> {{The issue is that PCA train does not run when the grouping column is text 
> (have not tested other non-integer data types). See below for error 
> reproduced on a modified example from the docs.}}
> DROP TABLE IF EXISTS mat_group_text;
> CREATE TABLE mat_group_text (
>  id integer,
>  row_vec double precision[],
>  matrix_id_text text
> );
> INSERT INTO mat_group_text VALUES
> (1, '\{1,2,3}', '1'),
> (2, '\{2,1,2}', '1'),
> (3, '\{3,2,1}', '1'),
> (4, '\{1,2,3,4,5}', '2'),
> (5, '\{2,5,2,4,1}', '2'),
> (6, '\{5,4,3,2,1}', '2');
> DROP TABLE IF EXISTS result_table_group_text, result_table_group_text_mean;
> SELECT madlib.pca_train('mat_group_text', -- Source table
>  'result_table_group_text', -- Output table
>  'id', -- Row id of source table
>  0.8, -- Proportion of variance
>  'matrix_id_text'); -- Grouping column
> SELECT * FROM result_table_group_text ORDER BY matrix_id_text, row_id_text;
> -- NOTICE: table "result_table_group_text" does not exist, skipping
> -- NOTICE: table "result_table_group_text_mean" does not exist, skipping
> -- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = 
> integer
> -- LINE 5: WHERE matrix_id_text=1
> -- ^
> -- HINT: No operator matches the given name and argument type(s). You might 
> need to add explicit type casts.
> -- QUERY: 
> -- CREATE TABLE pg_temp.__madlib_temp_57228654_1520981521_47712361__group_0 AS
> -- SELECT ROW_NUMBER() OVER() AS row_id, row_vec
> -- FROM mat_group_text
> -- WHERE matrix_id_text=1
> -- 
> -- CONTEXT: Traceback (most recent call last):
> -- PL/Python function "pca_train", line 23, in <module>
> -- return pca.pca(**globals())
> -- PL/Python function "pca_train", line 87, in pca
> -- PL/Python function "pca_train", line 235, in pca_wrap
> -- PL/Python function "pca_train"
> -- ********** Error **********
> -- 
> -- ERROR: plpy.SPIError: plpy.SPIError: operator does not exist: text = 
> integer
> -- SQL state: 42883
> -- Hint: No operator matches the given name and argument type(s). You might 
> need to add explicit type casts.
> -- Context: Traceback (most recent call last):
> -- PL/Python function "pca_train", line 23, in <module>
> -- return pca.pca(**globals())
> -- PL/Python function "pca_train", line 87, in pca
> -- PL/Python function "pca_train", line 235, in pca_wrap
> -- PL/Python function "pca_train"



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to