orhankislal commented on a change in pull request #441: Kmeans: simplified 
silhouette per point for k-means
URL: https://github.com/apache/madlib/pull/441#discussion_r325380668
 
 

 ##########
 File path: src/ports/postgres/modules/kmeans/kmeans.py_in
 ##########
 @@ -387,5 +396,62 @@ def compute_kmeans(schema_madlib, rel_args, rel_state, 
rel_source,
                             'old_centroid': old_centroid_str}))
     return iterationCtrl.iteration
 
+def simple_silhouette_points(schema_madlib, rel_source, output_table, pid,
+    expr_point, centroids, fn_dist, **kwargs):
+
+    with MinWarning("error"):
+        kmeans_validate_src(schema_madlib, rel_source)
+        output_tbl_valid(output_table, 'kmeans')
+
+        _assert(type(centroids) == list and
+                type(centroids[0]) == list and
+                len(centroids) > 1,
+                'kmeans: invalid centroids shape')
+
+        rel_source, expr_point = _create_temp_view_for_expr(schema_madlib,
+                                                            rel_source,
+                                                            expr_point)
+
+        plpy.execute("""
+            CREATE TABLE {output_table} AS
+                SELECT {pid}, centroids[1] AS centroid_id,
+                centroids[2] AS neighbor_centroid_id,
+                (CASE
+                    WHEN distances[2] = 0 THEN 0
+                    ELSE (distances[2] - distances[1]) / distances[2]
+                END) AS silh
+                FROM
+                (SELECT {pid},
+                       (cc_out).column_ids::integer[] AS centroids,
+                       (cc_out).distances::double precision[] AS distances
+                FROM (
+                    SELECT {pid},
+                           {schema_madlib}._closest_columns(
+                            array{centroids},
+                            {expr_point},
+                            2,
+                            '{fn_dist}'::REGPROC, '{fn_dist}') AS cc_out
+                    FROM {rel_source})q1
+                )q2
+            """.format(**locals()))
+
+def simple_silhouette_points_dbl_wrapper(schema_madlib, rel_source, 
output_table, pid,
 
 Review comment:
   Initially, I didn't have the wrapper but it was getting confused for some 
reason.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to