Github user njayaram2 commented on a diff in the pull request:
https://github.com/apache/incubator-madlib/pull/148#discussion_r126478640
--- Diff: src/ports/postgres/modules/graph/graph_utils.py_in ---
@@ -27,104 +27,103 @@
@namespace graph
"""
-import plpy
-from utilities.control import MinWarning
from utilities.utilities import _assert
-from utilities.utilities import extract_keyvalue_params
-from utilities.utilities import unique_string
from utilities.validate_args import get_cols
from utilities.validate_args import unquote_ident
from utilities.validate_args import table_exists
from utilities.validate_args import columns_exist_in_table
from utilities.validate_args import table_is_empty
+
def _check_groups(tbl1, tbl2, grp_list):
+ """
+ Helper function for joining tables with groups.
+ Args:
+ @param tbl1 Name of the first table
+ @param tbl2 Name of the second table
+ @param grp_list The list of grouping columns
+ """
- """
- Helper function for joining tables with groups.
- Args:
- @param tbl1 Name of the first table
- @param tbl2 Name of the second table
- @param grp_list The list of grouping columns
- """
+ return ' AND '.join([" {tbl1}.{i} = {tbl2}.{i} ".format(**locals())
+ for i in grp_list])
- return ' AND '.join([" {tbl1}.{i} = {tbl2}.{i} ".format(**locals())
- for i in grp_list])
def _grp_from_table(tbl, grp_list):
+ """
+ Helper function for selecting grouping columns of a table
+ Args:
+ @param tbl Name of the table
+ @param grp_list The list of grouping columns
+ """
+ return ' , '.join([" {tbl}.{i} ".format(**locals())
+ for i in grp_list])
- """
- Helper function for selecting grouping columns of a table
- Args:
- @param tbl Name of the table
- @param grp_list The list of grouping columns
- """
- return ' , '.join([" {tbl}.{i} ".format(**locals())
- for i in grp_list])
def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params,
- out_table, func_name, **kwargs):
- """
- Validates graph tables (vertex and edge) as well as the output table.
- """
- _assert(out_table and out_table.strip().lower() not in ('null', ''),
- "Graph {func_name}: Invalid output table
name!".format(**locals()))
- _assert(not table_exists(out_table),
- "Graph {func_name}: Output table already
exists!".format(**locals()))
-
- _assert(vertex_table and vertex_table.strip().lower() not in ('null',
''),
- "Graph {func_name}: Invalid vertex table
name!".format(**locals()))
- _assert(table_exists(vertex_table),
- "Graph {func_name}: Vertex table ({vertex_table}) is
missing!".format(
- **locals()))
- _assert(not table_is_empty(vertex_table),
- "Graph {func_name}: Vertex table ({vertex_table}) is
empty!".format(
- **locals()))
-
- _assert(edge_table and edge_table.strip().lower() not in ('null', ''),
- "Graph {func_name}: Invalid edge table
name!".format(**locals()))
- _assert(table_exists(edge_table),
- "Graph {func_name}: Edge table ({edge_table}) is
missing!".format(
- **locals()))
- _assert(not table_is_empty(edge_table),
- "Graph {func_name}: Edge table ({edge_table}) is empty!".format(
- **locals()))
-
- existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table))
- _assert(vertex_id in existing_cols,
- """Graph {func_name}: The vertex column {vertex_id} is not
present in vertex table ({vertex_table}) """.
- format(**locals()))
- _assert(columns_exist_in_table(edge_table, edge_params.values()),
- """Graph {func_name}: Not all columns from {cols} are present
in edge table ({edge_table})""".
- format(cols=edge_params.values(), **locals()))
-
- return None
+ out_table, func_name, **kwargs):
+ """
+ Validates graph tables (vertex and edge) as well as the output table.
+ """
+ _assert(out_table and out_table.strip().lower() not in ('null', ''),
+ "Graph {func_name}: Invalid output table
name!".format(**locals()))
+ _assert(not table_exists(out_table),
+ "Graph {func_name}: Output table already
exists!".format(**locals()))
+
+ _assert(vertex_table and vertex_table.strip().lower() not in ('null',
''),
+ "Graph {func_name}: Invalid vertex table
name!".format(**locals()))
+ _assert(table_exists(vertex_table),
+ "Graph {func_name}: Vertex table ({vertex_table}) is
missing!".format(
+ **locals()))
+ _assert(not table_is_empty(vertex_table),
+ "Graph {func_name}: Vertex table ({vertex_table}) is
empty!".format(
+ **locals()))
+
+ _assert(edge_table and edge_table.strip().lower() not in ('null', ''),
+ "Graph {func_name}: Invalid edge table
name!".format(**locals()))
+ _assert(table_exists(edge_table),
+ "Graph {func_name}: Edge table ({edge_table}) is
missing!".format(
+ **locals()))
+ _assert(not table_is_empty(edge_table),
+ "Graph {func_name}: Edge table ({edge_table}) is
empty!".format(
+ **locals()))
+
+ existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table))
+ _assert(vertex_id in existing_cols,
+ """Graph {func_name}: The vertex column {vertex_id} is not
present in vertex table ({vertex_table}) """.
+ format(**locals()))
+ _assert(columns_exist_in_table(edge_table, edge_params.values()),
+ """Graph {func_name}: Not all columns from {cols} are present
in edge table ({edge_table})""".
+ format(cols=edge_params.values(), **locals()))
+
+ return None
+
def get_graph_usage(schema_madlib, func_name, other_text):
- usage = """
-----------------------------------------------------------------------------
- USAGE
-----------------------------------------------------------------------------
- SELECT {schema_madlib}.{func_name}(
- vertex_table TEXT, -- Name of the table that contains the vertex data.
- vertex_id TEXT, -- Name of the column containing the vertex ids.
- edge_table TEXT, -- Name of the table that contains the edge data.
- edge_args TEXT{comma} -- A comma-delimited string containing
multiple
- -- named arguments of the form "name=value".
- {other_text}
-);
-
-The following parameters are supported for edge table arguments
('edge_args'
- above):
-
-src (default = 'src') : Name of the column containing the
source
- vertex ids in the edge table.
-dest (default = 'dest') : Name of the column containing the
destination
- vertex ids in the edge table.
-weight (default = 'weight') : Name of the column containing the
weight of
- edges in the edge table.
-""".format(schema_madlib=schema_madlib, func_name=func_name,
- other_text=other_text, comma = ',' if other_text is not None else ' ')
-
- return usage
+ usage = """
+
----------------------------------------------------------------------------
+ USAGE
+
----------------------------------------------------------------------------
+ SELECT {schema_madlib}.{func_name}(
+ vertex_table TEXT, -- Name of the table that contains the vertex
data.
+ vertex_id TEXT, -- Name of the column containing the vertex
ids.
+ edge_table TEXT, -- Name of the table that contains the edge
data.
+ edge_args TEXT{comma} -- A comma-delimited string containing
multiple
+ -- named arguments of the form "name=value".
+ {other_text}
--- End diff --
`other_text` is also a string that is passed to this function. Shouldn't
that string also be indented with a tab's space? For instance, this is how help
on `sssp` looks like now:
```
graph_sssp
----------------------------------------------------------------------------------
Given a graph and a source vertex, single source shortest path (SSSP)
algorithm finds a path for every vertex such that the sum of the
weights of its constituent edges is minimized.
----------------------------------------------------------------------------
USAGE
----------------------------------------------------------------------------
SELECT madlib.graph_sssp(
vertex_table TEXT, -- Name of the table that contains the vertex
data.
vertex_id TEXT, -- Name of the column containing the vertex
ids.
edge_table TEXT, -- Name of the table that contains the edge
data.
edge_args TEXT, -- A comma-delimited string containing
multiple
-- named arguments of the form "name=value".
source_vertex INT, -- The source vertex id for the algorithm to
start.
out_table TEXT, -- Name of the table to store the result of SSSP.
grouping_cols TEXT -- The list of grouping columns.
);
```
the description of `out_table` and `grouping_cols` comes from the
`other_text` param.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---