Github user fmcquillan99 commented on the issue: https://github.com/apache/madlib/pull/291 Wondering about order for varchar and text casting. For this data set: ``` DROP TABLE IF EXISTS golf CASCADE; CREATE TABLE golf ( id int, "OUTLOOK" varchar, temperature smallint, humidity real, "Temp_Humidity" double precision[], clouds_airquality text[], windy boolean, class text, observation_weight double precision ); INSERT INTO golf VALUES (1,'sunny', 85, 85, ARRAY[85, 85],ARRAY['none', 'unhealthy'], 'false','Don''t Play', 5.0), (2, 'sunny', 80, 90, ARRAY[80, 90], ARRAY['none', 'moderate'], 'true', 'Don''t Play', 5.0), (3, 'overcast', 83, 78, ARRAY[83, 78], ARRAY['low', 'moderate'], 'false', 'Play', 1.5), (4, 'rain', 70, 96, ARRAY[70, 96], ARRAY['low', 'moderate'], 'false', 'Play', 1.0), (5, 'rain', 68, 80, ARRAY[68, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0), (6, 'rain', 65, 70, ARRAY[65, 70], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0), (7, 'overcast', 64, 65, ARRAY[64, 65], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5), (8, 'sunny', 72, 95, ARRAY[72, 95], ARRAY['high', 'unhealthy'], 'false', 'Don''t Play', 5.0), (9, 'sunny', 69, 70, ARRAY[69, 70], ARRAY['high', 'good'], 'false', 'Play', 5.0), (10, 'rain', 75, 80, ARRAY[75, 80], ARRAY['medium', 'good'], 'false', 'Play', 1.0), (11, 'sunny', 75, 70, ARRAY[75, 70], ARRAY['none', 'good'], 'true', 'Play', 5.0), (12, 'overcast', 72, 90, ARRAY[72, 90], ARRAY['medium', 'moderate'], 'true', 'Play', 1.5), (13, 'overcast', 81, 75, ARRAY[81, 75], ARRAY['medium', 'moderate'], 'false', 'Play', 1.5), (14, 'rain', 71, 80, ARRAY[71, 80], ARRAY['low', 'unhealthy'], 'true', 'Don''t Play', 1.0); ``` (1) ``` DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary; SELECT madlib.cols2vec( 'golf', 'cols2vec_result', '"OUTLOOK", class' ); ``` produces a varchar array: ``` select * from INFORMATION_SCHEMA.COLUMNS where table_name = 'out99'; -[ RECORD 1 ]------------+------------------ table_catalog | madlib table_schema | public table_name | out99 column_name | f2 ordinal_position | 2 column_default | is_nullable | YES data_type | character varying character_maximum_length | character_octet_length | 1073741824 numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type | interval_precision | character_set_catalog | character_set_schema | character_set_name | collation_catalog | collation_schema | collation_name | domain_catalog | domain_schema | domain_name | udt_catalog | madlib udt_schema | pg_catalog udt_name | varchar scope_catalog | scope_schema | scope_name | maximum_cardinality | dtd_identifier | 2 is_self_referencing | NO is_identity | NO identity_generation | identity_start | identity_increment | identity_maximum | identity_minimum | identity_cycle | is_generated | NEVER generation_expression | is_updatable | YES -[ RECORD 2 ]------------+------------------ table_catalog | madlib table_schema | public table_name | out99 column_name | f1 ordinal_position | 1 column_default | is_nullable | YES data_type | character varying character_maximum_length | character_octet_length | 1073741824 numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type | interval_precision | character_set_catalog | character_set_schema | character_set_name | collation_catalog | collation_schema | collation_name | domain_catalog | domain_schema | domain_name | udt_catalog | madlib udt_schema | pg_catalog udt_name | varchar scope_catalog | scope_schema | scope_name | maximum_cardinality | dtd_identifier | 1 is_self_referencing | NO is_identity | NO identity_generation | identity_start | identity_increment | identity_maximum | identity_minimum | identity_cycle | is_generated | NEVER generation_expression | is_updatable | YES ``` (2) ``` DROP TABLE IF EXISTS cols2vec_result, cols2vec_result_summary; SELECT madlib.cols2vec( 'golf', 'cols2vec_result', 'class, "OUTLOOK"' ); ``` produces a text array: ``` select * from INFORMATION_SCHEMA.COLUMNS where table_name = 'out99'; -[ RECORD 1 ]------------+----------- table_catalog | madlib table_schema | public table_name | out99 column_name | f2 ordinal_position | 2 column_default | is_nullable | YES data_type | text character_maximum_length | character_octet_length | 1073741824 numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type | interval_precision | character_set_catalog | character_set_schema | character_set_name | collation_catalog | collation_schema | collation_name | domain_catalog | domain_schema | domain_name | udt_catalog | madlib udt_schema | pg_catalog udt_name | text scope_catalog | scope_schema | scope_name | maximum_cardinality | dtd_identifier | 2 is_self_referencing | NO is_identity | NO identity_generation | identity_start | identity_increment | identity_maximum | identity_minimum | identity_cycle | is_generated | NEVER generation_expression | is_updatable | YES -[ RECORD 2 ]------------+----------- table_catalog | madlib table_schema | public table_name | out99 column_name | f1 ordinal_position | 1 column_default | is_nullable | YES data_type | text character_maximum_length | character_octet_length | 1073741824 numeric_precision | numeric_precision_radix | numeric_scale | datetime_precision | interval_type | interval_precision | character_set_catalog | character_set_schema | character_set_name | collation_catalog | collation_schema | collation_name | domain_catalog | domain_schema | domain_name | udt_catalog | madlib udt_schema | pg_catalog udt_name | text scope_catalog | scope_schema | scope_name | maximum_cardinality | dtd_identifier | 1 is_self_referencing | NO is_identity | NO identity_generation | identity_start | identity_increment | identity_maximum | identity_minimum | identity_cycle | is_generated | NEVER generation_expression | is_updatable | YES ``` Why is that?
---