iyerr3 commented on a change in pull request #352: Feature/kd tree knn
URL: https://github.com/apache/madlib/pull/352#discussion_r255756210
 
 

 ##########
 File path: src/ports/postgres/modules/knn/knn.py_in
 ##########
 @@ -124,16 +137,312 @@ def knn_validate_src(schema_madlib, point_source, 
point_column_name, point_id,
             """.format(fn_dist=fn_dist, profunc=profunc))[0]['output']
 
         if is_invalid_func or (fn_dist not in dist_functions):
-            plpy.error("KNN error: Distance function has invalid signature "
-                       "or is not a simple function.")
-
+            plpy.error("KNN error: Distance function ({0}) has invalid 
signature "
+                       "or is not a simple function.".format(fn_dist))
+    if depth <= 0:
+        plpy.error("kNN Error: depth={0} is an invalid value, must be greater "
+                   "than 0.".format(depth))
+    if leaf_nodes <= 0:
+        plpy.error("kNN Error: leaf_nodes={0} is an invalid value, must be 
greater "
+                   "than 0.".format(leaf_nodes))
+    if pow(2,depth) <= leaf_nodes:
+        plpy.error("kNN Error: depth={0}, leaf_nodes={1} is not valid. "
+                   "The leaf_nodes value must be lower than 
2^depth".format(depth, leaf_nodes))
     return k
 # 
------------------------------------------------------------------------------
 
 
+def kd_tree(schema_madlib, source_table, output_table, point_column_name, 
depth,
+            r_id, dim, **kwargs):
+    """
+        KD-tree function to create a partitioning for KNN
+        Args:
+            @param schema_madlib        Name of the Madlib Schema
+            @param source_table         Training data table
+            @param output_table         Name of the table to store kd tree
+            @param point_column_name    Name of the column with training data
+                                        or expression that evaluates to a
+                                        numeric array
+            @param depth                Depth of the kd tree
+            @param r_id                 Name of the region id column
+            @param dim                  Name of the dimension column
+
+    """
+    with MinWarning("error"):
+
+        validate_kd_tree(source_table, output_table, point_column_name, depth)
+        n_features = num_features(source_table, point_column_name)
+
+        clauses = [" WHERE 1=1 "]
 
 Review comment:
   Do we need this as `"WHERE 1=1"`? How about just `'1=1'` with the `WHERE` 
added directly in the queries? That would avoid the fragile `[14:]` in line 213.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to