[
https://issues.apache.org/jira/browse/MADLIB-1018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Frank McQuillan updated MADLIB-1018:
------------------------------------
Description:
For k-means, normally you should be able to do array[col1, col2…] for the 2nd
parameter, but that does not work. This JIRA is to be able to support
array[col1, col2…].
{code}
expr_point
TEXT. The name of the column with point coordinates.
{code}
{code}
SELECT madlib.kmeans_random('customers_train',
'array[creditamount, accountbalance]',
3
);
{code}
produces
{code}
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-50-0b939dd162ef> in <module>()
----> 1 get_ipython().run_cell_magic(u'sql', u'', u"\nSELECT
madlib.kmeans_random('customers_train',\n 'array[creditamount,
accountbalance]',\n 3\n );\n")
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc
in run_cell_magic(self, magic_name, line, cell)
2291 magic_arg_s = self.var_expand(line, stack_depth)
2292 with self.builtin_trap:
-> 2293 result = fn(magic_arg_s, cell)
2294 return result
2295
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
78 return self._persist_dataframe(parsed['sql'], conn, user_ns)
79 try:
---> 80 result = sql.run.run(conn, parsed['sql'], self, user_ns)
81 return result
82 except (ProgrammingError, OperationalError) as e:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/run.pyc in run(conn,
sql, config, user_namespace)
270 raise Exception("ipython_sql does not support
transactions")
271 txt = sqlalchemy.sql.text(statement)
--> 272 result = conn.session.execute(txt, user_namespace)
273 try:
274 conn.session.execute('commit')
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in execute(self, object, *multiparams, **params)
912 type(object))
913 else:
--> 914 return meth(self, multiparams, params)
915
916 def _execute_function(self, func, multiparams, params):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/sql/elements.pyc
in _execute_on_connection(self, connection, multiparams, params)
321
322 def _execute_on_connection(self, connection, multiparams, params):
--> 323 return connection._execute_clauseelement(self, multiparams,
params)
324
325 def unique_params(self, *optionaldict, **kwargs):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_clauseelement(self, elem, multiparams, params)
1008 compiled_sql,
1009 distilled_params,
-> 1010 compiled_sql, distilled_params
1011 )
1012 if self._has_events or self.engine._has_events:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_context(self, dialect, constructor, statement, parameters, *args)
1144 parameters,
1145 cursor,
-> 1146 context)
1147
1148 if self._has_events or self.engine._has_events:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1339 util.raise_from_cause(
1340 sqlalchemy_exception,
-> 1341 exc_info
1342 )
1343 else:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/util/compat.pyc
in raise_from_cause(exception, exc_info)
197 exc_info = sys.exc_info()
198 exc_type, exc_value, exc_tb = exc_info
--> 199 reraise(type(exception), exception, tb=exc_tb)
200
201 if py3k:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_context(self, dialect, constructor, statement, parameters, *args)
1137 statement,
1138 parameters,
-> 1139 context)
1140 except Exception as e:
1141 self._handle_dbapi_exception(
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/default.pyc
in do_execute(self, cursor, statement, parameters, context)
448
449 def do_execute(self, cursor, statement, parameters, context=None):
--> 450 cursor.execute(statement, parameters)
451
452 def do_execute_no_params(self, cursor, statement, context=None):
InternalError: (psycopg2.InternalError) plpy.SPIError: syntax error at or near
"," (plpython.c:4651)
LINE 44: ... _src.array[creditamount, accountb...
^
QUERY:
SELECT
1 AS _iteration,
madlib.array_to_1d((_state).centroids) AS centroids,
(_state).old_centroid_ids,
(_state).objective_fn,
(_state).frac_reassigned
FROM
(
SELECT (
SELECT
CAST((
madlib.matrix_agg(
_centroid::FLOAT8[]
ORDER BY _new_centroid_id),
array_agg(_new_centroid_id ORDER BY _new_centroid_id),
sum(_objective_fn),
CAST(sum(_num_reassigned) AS DOUBLE PRECISION)
/ sum(_num_points)
) AS madlib.kmeans_state)
FROM (
SELECT
(_new_centroid).column_id AS _new_centroid_id,
sum((_new_centroid).distance) AS _objective_fn,
count(*) AS _num_points,
sum(
CAST(
coalesce(
(CAST(
(SELECT (CAST ((madlib.array_to_2d($1),
$2, $3, $4)
AS madlib.kmeans_state)).old_centroid_ids) AS
INTEGER[]
))[(_new_centroid).column_id + 1] !=
_old_centroid_id,
TRUE
)
AS INTEGER
)
) AS _num_reassigned,
madlib.avg(_point::FLOAT8[]) AS _centroid
FROM (
SELECT
-- PostgreSQL/Greenplum tuning:
-- VOLATILE function as optimization fence
madlib.noop(),
_src.array[creditamount, accountbalance] AS _point,
madlib.closest_column(
(SELECT (CAST ((madlib.array_to_2d($1), $2, $3,
$4)
AS madlib.kmeans_state)).centroids)
, _src.array[creditamount,
accountbalance]::FLOAT8[]
, 'madlib.squared_dist_norm2'
)
AS _new_centroid,
(madlib.closest_column((SELECT (CAST
((madlib.array_to_2d($5), $6, $7, $8)
AS madlib.kmeans_state)).centroids)
, _src.array[creditamount,
accountbalance]::FLOAT8[]
, 'madlib.squared_dist_norm2'
)
).column_id
AS _old_centroid_id
FROM customers_train AS _src
WHERE
abs(coalesce(madlib.svec_elsum(array[creditamount, accountbalance]),
'Infinity'::FLOAT8)) < 'Infinity'::FLOAT8
AND NOT
madlib.array_contains_null(_src.array[creditamount, accountbalance]::FLOAT8[])
) AS _points_with_assignments
GROUP BY (_new_centroid).column_id
) AS _new_centroids
) AS _state
) q
CONTEXT: Traceback (most recent call last):
PL/Python function "internal_compute_kmeans", line 22, in <module>
return kmeans.compute_kmeans(**globals())
PL/Python function "internal_compute_kmeans", line 332, in compute_kmeans
PL/Python function "internal_compute_kmeans", line 227, in update
PL/Python function "internal_compute_kmeans"
SQL statement "SELECT madlib.internal_compute_kmeans( '_madlib_kmeans_args',
'_madlib_kmeans_state', textin(regclassout( $1 )), $2 , textin(regprocout( $3
)))"
PL/pgSQL function "kmeans" line 103 at assignment
SQL statement "SELECT madlib.kmeans( $1 , $2 , madlib.kmeans_random_seeding(
$1 , $2 , $3 ), 'madlib.squared_dist_norm2', 'madlib.avg', 20, 0.001)"
PL/pgSQL function "kmeans_random" line 4 at assignment
[SQL: "SELECT madlib.kmeans_random('customers_train',\n
'array[creditamount, accountbalance]',\n 3\n );"]
{code}
The workaround is to create a view:
{code}
CREATE VIEW cluster_params AS (SELECT *, array[creditamount, accountbalance] as
p1 FROM customers_train);
SELECT madlib.kmeans_random('cluster_params',
'p1',
3
);
{code}
produces
{code}
("{{8619.6635514,3490.145919},{2343.72082019,6004.36435331},{2191.06698565,1908.8522488}}",9660868534.24,0.001,11)
{code}
was:
For k-means, normally you should be able to do array[col1, col2…] for the 2nd
parameter, but that does not work. This JIRA is to be able to support
array[col1, col2…].
{code}
expr_point
TEXT. The name of the column with point coordinates.
{code}
{code}
SELECT madlib.kmeans_random('customers_train',
'array[creditamount, accountbalance]',
3
);
{code}
produces
{code}
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
<ipython-input-50-0b939dd162ef> in <module>()
----> 1 get_ipython().run_cell_magic(u'sql', u'', u"\nSELECT
madlib.kmeans_random('customers_train',\n 'array[creditamount,
accountbalance]',\n 3\n );\n")
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc
in run_cell_magic(self, magic_name, line, cell)
2291 magic_arg_s = self.var_expand(line, stack_depth)
2292 with self.builtin_trap:
-> 2293 result = fn(magic_arg_s, cell)
2294 return result
2295
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
in <lambda>(f, *a, **k)
191 # but it's overkill for just that one bit of state.
192 def magic_deco(arg):
--> 193 call = lambda f, *a, **k: f(*a, **k)
194
195 if callable(arg):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
execute(self, line, cell, local_ns)
78 return self._persist_dataframe(parsed['sql'], conn, user_ns)
79 try:
---> 80 result = sql.run.run(conn, parsed['sql'], self, user_ns)
81 return result
82 except (ProgrammingError, OperationalError) as e:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/run.pyc in run(conn,
sql, config, user_namespace)
270 raise Exception("ipython_sql does not support
transactions")
271 txt = sqlalchemy.sql.text(statement)
--> 272 result = conn.session.execute(txt, user_namespace)
273 try:
274 conn.session.execute('commit')
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in execute(self, object, *multiparams, **params)
912 type(object))
913 else:
--> 914 return meth(self, multiparams, params)
915
916 def _execute_function(self, func, multiparams, params):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/sql/elements.pyc
in _execute_on_connection(self, connection, multiparams, params)
321
322 def _execute_on_connection(self, connection, multiparams, params):
--> 323 return connection._execute_clauseelement(self, multiparams,
params)
324
325 def unique_params(self, *optionaldict, **kwargs):
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_clauseelement(self, elem, multiparams, params)
1008 compiled_sql,
1009 distilled_params,
-> 1010 compiled_sql, distilled_params
1011 )
1012 if self._has_events or self.engine._has_events:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_context(self, dialect, constructor, statement, parameters, *args)
1144 parameters,
1145 cursor,
-> 1146 context)
1147
1148 if self._has_events or self.engine._has_events:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1339 util.raise_from_cause(
1340 sqlalchemy_exception,
-> 1341 exc_info
1342 )
1343 else:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/util/compat.pyc
in raise_from_cause(exception, exc_info)
197 exc_info = sys.exc_info()
198 exc_type, exc_value, exc_tb = exc_info
--> 199 reraise(type(exception), exception, tb=exc_tb)
200
201 if py3k:
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
in _execute_context(self, dialect, constructor, statement, parameters, *args)
1137 statement,
1138 parameters,
-> 1139 context)
1140 except Exception as e:
1141 self._handle_dbapi_exception(
/Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/default.pyc
in do_execute(self, cursor, statement, parameters, context)
448
449 def do_execute(self, cursor, statement, parameters, context=None):
--> 450 cursor.execute(statement, parameters)
451
452 def do_execute_no_params(self, cursor, statement, context=None):
InternalError: (psycopg2.InternalError) plpy.SPIError: syntax error at or near
"," (plpython.c:4651)
LINE 44: ... _src.array[creditamount, accountb...
^
QUERY:
SELECT
1 AS _iteration,
madlib.array_to_1d((_state).centroids) AS centroids,
(_state).old_centroid_ids,
(_state).objective_fn,
(_state).frac_reassigned
FROM
(
SELECT (
SELECT
CAST((
madlib.matrix_agg(
_centroid::FLOAT8[]
ORDER BY _new_centroid_id),
array_agg(_new_centroid_id ORDER BY _new_centroid_id),
sum(_objective_fn),
CAST(sum(_num_reassigned) AS DOUBLE PRECISION)
/ sum(_num_points)
) AS madlib.kmeans_state)
FROM (
SELECT
(_new_centroid).column_id AS _new_centroid_id,
sum((_new_centroid).distance) AS _objective_fn,
count(*) AS _num_points,
sum(
CAST(
coalesce(
(CAST(
(SELECT (CAST ((madlib.array_to_2d($1),
$2, $3, $4)
AS madlib.kmeans_state)).old_centroid_ids) AS
INTEGER[]
))[(_new_centroid).column_id + 1] !=
_old_centroid_id,
TRUE
)
AS INTEGER
)
) AS _num_reassigned,
madlib.avg(_point::FLOAT8[]) AS _centroid
FROM (
SELECT
-- PostgreSQL/Greenplum tuning:
-- VOLATILE function as optimization fence
madlib.noop(),
_src.array[creditamount, accountbalance] AS _point,
madlib.closest_column(
(SELECT (CAST ((madlib.array_to_2d($1), $2, $3,
$4)
AS madlib.kmeans_state)).centroids)
, _src.array[creditamount,
accountbalance]::FLOAT8[]
, 'madlib.squared_dist_norm2'
)
AS _new_centroid,
(madlib.closest_column((SELECT (CAST
((madlib.array_to_2d($5), $6, $7, $8)
AS madlib.kmeans_state)).centroids)
, _src.array[creditamount,
accountbalance]::FLOAT8[]
, 'madlib.squared_dist_norm2'
)
).column_id
AS _old_centroid_id
FROM customers_train AS _src
WHERE
abs(coalesce(madlib.svec_elsum(array[creditamount, accountbalance]),
'Infinity'::FLOAT8)) < 'Infinity'::FLOAT8
AND NOT
madlib.array_contains_null(_src.array[creditamount, accountbalance]::FLOAT8[])
) AS _points_with_assignments
GROUP BY (_new_centroid).column_id
) AS _new_centroids
) AS _state
) q
CONTEXT: Traceback (most recent call last):
PL/Python function "internal_compute_kmeans", line 22, in <module>
return kmeans.compute_kmeans(**globals())
PL/Python function "internal_compute_kmeans", line 332, in compute_kmeans
PL/Python function "internal_compute_kmeans", line 227, in update
PL/Python function "internal_compute_kmeans"
SQL statement "SELECT madlib.internal_compute_kmeans( '_madlib_kmeans_args',
'_madlib_kmeans_state', textin(regclassout( $1 )), $2 , textin(regprocout( $3
)))"
PL/pgSQL function "kmeans" line 103 at assignment
SQL statement "SELECT madlib.kmeans( $1 , $2 , madlib.kmeans_random_seeding(
$1 , $2 , $3 ), 'madlib.squared_dist_norm2', 'madlib.avg', 20, 0.001)"
PL/pgSQL function "kmeans_random" line 4 at assignment
[SQL: "SELECT madlib.kmeans_random('customers_train',\n
'array[creditamount, accountbalance]',\n 3\n );"]
{code}
The workaround is to create a view:
{code}
CREATE VIEW cluster_params AS (SELECT *, array[creditamount, accountbalance] as
p1 FROM customers_train);
SELECT madlib.kmeans_random('cluster_params',
'p1',
3
);
{code}
{code}
("{{8619.6635514,3490.145919},{2343.72082019,6004.36435331},{2191.06698565,1908.8522488}}",9660868534.24,0.001,11)
{code}
> Fix K-means support for array input for data points
> ---------------------------------------------------
>
> Key: MADLIB-1018
> URL: https://issues.apache.org/jira/browse/MADLIB-1018
> Project: Apache MADlib
> Issue Type: Bug
> Components: Module: k-Means Clustering
> Reporter: Frank McQuillan
> Priority: Minor
> Fix For: v1.9.2
>
>
> For k-means, normally you should be able to do array[col1, col2…] for the 2nd
> parameter, but that does not work. This JIRA is to be able to support
> array[col1, col2…].
> {code}
> expr_point
> TEXT. The name of the column with point coordinates.
> {code}
> {code}
> SELECT madlib.kmeans_random('customers_train',
> 'array[creditamount, accountbalance]',
> 3
> );
> {code}
> produces
> {code}
> ---------------------------------------------------------------------------
> InternalError Traceback (most recent call last)
> <ipython-input-50-0b939dd162ef> in <module>()
> ----> 1 get_ipython().run_cell_magic(u'sql', u'', u"\nSELECT
> madlib.kmeans_random('customers_train',\n 'array[creditamount,
> accountbalance]',\n 3\n );\n")
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.pyc
> in run_cell_magic(self, magic_name, line, cell)
> 2291 magic_arg_s = self.var_expand(line, stack_depth)
> 2292 with self.builtin_trap:
> -> 2293 result = fn(magic_arg_s, cell)
> 2294 return result
> 2295
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
> execute(self, line, cell, local_ns)
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
> in <lambda>(f, *a, **k)
> 191 # but it's overkill for just that one bit of state.
> 192 def magic_deco(arg):
> --> 193 call = lambda f, *a, **k: f(*a, **k)
> 194
> 195 if callable(arg):
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
> execute(self, line, cell, local_ns)
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/IPython/core/magic.pyc
> in <lambda>(f, *a, **k)
> 191 # but it's overkill for just that one bit of state.
> 192 def magic_deco(arg):
> --> 193 call = lambda f, *a, **k: f(*a, **k)
> 194
> 195 if callable(arg):
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/magic.pyc in
> execute(self, line, cell, local_ns)
> 78 return self._persist_dataframe(parsed['sql'], conn,
> user_ns)
> 79 try:
> ---> 80 result = sql.run.run(conn, parsed['sql'], self, user_ns)
> 81 return result
> 82 except (ProgrammingError, OperationalError) as e:
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sql/run.pyc in
> run(conn, sql, config, user_namespace)
> 270 raise Exception("ipython_sql does not support
> transactions")
> 271 txt = sqlalchemy.sql.text(statement)
> --> 272 result = conn.session.execute(txt, user_namespace)
> 273 try:
> 274 conn.session.execute('commit')
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
> in execute(self, object, *multiparams, **params)
> 912 type(object))
> 913 else:
> --> 914 return meth(self, multiparams, params)
> 915
> 916 def _execute_function(self, func, multiparams, params):
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/sql/elements.pyc
> in _execute_on_connection(self, connection, multiparams, params)
> 321
> 322 def _execute_on_connection(self, connection, multiparams, params):
> --> 323 return connection._execute_clauseelement(self, multiparams,
> params)
> 324
> 325 def unique_params(self, *optionaldict, **kwargs):
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
> in _execute_clauseelement(self, elem, multiparams, params)
> 1008 compiled_sql,
> 1009 distilled_params,
> -> 1010 compiled_sql, distilled_params
> 1011 )
> 1012 if self._has_events or self.engine._has_events:
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
> in _execute_context(self, dialect, constructor, statement, parameters, *args)
> 1144 parameters,
> 1145 cursor,
> -> 1146 context)
> 1147
> 1148 if self._has_events or self.engine._has_events:
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
> in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
> 1339 util.raise_from_cause(
> 1340 sqlalchemy_exception,
> -> 1341 exc_info
> 1342 )
> 1343 else:
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/util/compat.pyc
> in raise_from_cause(exception, exc_info)
> 197 exc_info = sys.exc_info()
> 198 exc_type, exc_value, exc_tb = exc_info
> --> 199 reraise(type(exception), exception, tb=exc_tb)
> 200
> 201 if py3k:
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc
> in _execute_context(self, dialect, constructor, statement, parameters, *args)
> 1137 statement,
> 1138 parameters,
> -> 1139 context)
> 1140 except Exception as e:
> 1141 self._handle_dbapi_exception(
> /Users/fmcquillan/anaconda/lib/python2.7/site-packages/sqlalchemy/engine/default.pyc
> in do_execute(self, cursor, statement, parameters, context)
> 448
> 449 def do_execute(self, cursor, statement, parameters, context=None):
> --> 450 cursor.execute(statement, parameters)
> 451
> 452 def do_execute_no_params(self, cursor, statement, context=None):
> InternalError: (psycopg2.InternalError) plpy.SPIError: syntax error at or
> near "," (plpython.c:4651)
> LINE 44: ... _src.array[creditamount, accountb...
> ^
> QUERY:
> SELECT
> 1 AS _iteration,
> madlib.array_to_1d((_state).centroids) AS centroids,
> (_state).old_centroid_ids,
> (_state).objective_fn,
> (_state).frac_reassigned
> FROM
> (
> SELECT (
> SELECT
> CAST((
> madlib.matrix_agg(
> _centroid::FLOAT8[]
> ORDER BY _new_centroid_id),
> array_agg(_new_centroid_id ORDER BY _new_centroid_id),
> sum(_objective_fn),
> CAST(sum(_num_reassigned) AS DOUBLE PRECISION)
> / sum(_num_points)
> ) AS madlib.kmeans_state)
> FROM (
> SELECT
> (_new_centroid).column_id AS _new_centroid_id,
> sum((_new_centroid).distance) AS _objective_fn,
> count(*) AS _num_points,
> sum(
> CAST(
> coalesce(
> (CAST(
> (SELECT (CAST
> ((madlib.array_to_2d($1), $2, $3, $4)
> AS madlib.kmeans_state)).old_centroid_ids) AS
> INTEGER[]
> ))[(_new_centroid).column_id + 1] !=
> _old_centroid_id,
> TRUE
> )
> AS INTEGER
> )
> ) AS _num_reassigned,
> madlib.avg(_point::FLOAT8[]) AS _centroid
> FROM (
> SELECT
> -- PostgreSQL/Greenplum tuning:
> -- VOLATILE function as optimization fence
> madlib.noop(),
> _src.array[creditamount, accountbalance] AS
> _point,
> madlib.closest_column(
> (SELECT (CAST ((madlib.array_to_2d($1), $2,
> $3, $4)
> AS madlib.kmeans_state)).centroids)
> , _src.array[creditamount,
> accountbalance]::FLOAT8[]
> , 'madlib.squared_dist_norm2'
>
> )
> AS _new_centroid,
> (madlib.closest_column((SELECT (CAST
> ((madlib.array_to_2d($5), $6, $7, $8)
> AS madlib.kmeans_state)).centroids)
> , _src.array[creditamount,
> accountbalance]::FLOAT8[]
> , 'madlib.squared_dist_norm2'
>
> )
> ).column_id
> AS _old_centroid_id
> FROM customers_train AS _src
> WHERE
> abs(coalesce(madlib.svec_elsum(array[creditamount, accountbalance]),
> 'Infinity'::FLOAT8)) < 'Infinity'::FLOAT8
> AND NOT
> madlib.array_contains_null(_src.array[creditamount, accountbalance]::FLOAT8[])
> ) AS _points_with_assignments
> GROUP BY (_new_centroid).column_id
> ) AS _new_centroids
> ) AS _state
> ) q
>
> CONTEXT: Traceback (most recent call last):
> PL/Python function "internal_compute_kmeans", line 22, in <module>
> return kmeans.compute_kmeans(**globals())
> PL/Python function "internal_compute_kmeans", line 332, in compute_kmeans
> PL/Python function "internal_compute_kmeans", line 227, in update
> PL/Python function "internal_compute_kmeans"
> SQL statement "SELECT madlib.internal_compute_kmeans( '_madlib_kmeans_args',
> '_madlib_kmeans_state', textin(regclassout( $1 )), $2 , textin(regprocout(
> $3 )))"
> PL/pgSQL function "kmeans" line 103 at assignment
> SQL statement "SELECT madlib.kmeans( $1 , $2 ,
> madlib.kmeans_random_seeding( $1 , $2 , $3 ), 'madlib.squared_dist_norm2',
> 'madlib.avg', 20, 0.001)"
> PL/pgSQL function "kmeans_random" line 4 at assignment
> [SQL: "SELECT madlib.kmeans_random('customers_train',\n
> 'array[creditamount, accountbalance]',\n 3\n );"]
> {code}
> The workaround is to create a view:
> {code}
> CREATE VIEW cluster_params AS (SELECT *, array[creditamount, accountbalance]
> as p1 FROM customers_train);
> SELECT madlib.kmeans_random('cluster_params',
> 'p1',
> 3
> );
> {code}
> produces
> {code}
> ("{{8619.6635514,3490.145919},{2343.72082019,6004.36435331},{2191.06698565,1908.8522488}}",9660868534.24,0.001,11)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)