Repository: incubator-madlib
Updated Branches:
  refs/heads/master 3f599c943 -> 06788cc48


Graph: Add initial set of centrality measures

JIRA: MADLIB-1073

This commit adds the following graph measures:
- Closeness (uses APSP)
- Graph diameter (uses APSP)
- Average path length (uses APSP)
- In/out degrees


Project: http://git-wip-us.apache.org/repos/asf/incubator-madlib/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-madlib/commit/06788cc4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-madlib/tree/06788cc4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-madlib/diff/06788cc4

Branch: refs/heads/master
Commit: 06788cc486fbd15391e3d477748a37fe0657baf1
Parents: 3f599c9
Author: Rahul Iyer <ri...@apache.org>
Authored: Mon Jul 31 13:54:48 2017 -0700
Committer: Rahul Iyer <ri...@apache.org>
Committed: Mon Jul 31 13:54:48 2017 -0700

----------------------------------------------------------------------
 doc/mainpage.dox.in                             |  23 +-
 src/ports/postgres/modules/graph/apsp.py_in     |  13 +-
 src/ports/postgres/modules/graph/measures.py_in | 639 +++++++++++++++
 .../postgres/modules/graph/measures.sql_in      | 816 +++++++++++++++++++
 .../postgres/modules/graph/test/measures.sql_in | 150 ++++
 5 files changed, 1623 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/06788cc4/doc/mainpage.dox.in
----------------------------------------------------------------------
diff --git a/doc/mainpage.dox.in b/doc/mainpage.dox.in
index de70d5d..30f566d 100644
--- a/doc/mainpage.dox.in
+++ b/doc/mainpage.dox.in
@@ -123,23 +123,24 @@ complete matrix stored as a distributed table.
 
         @defgroup grp_stemmer Stemming
         @ingroup grp_datatrans
-@defgroup grp_graph Graph
-@{Contains graph algorithms. @}
 
+@defgroup grp_graph Graph
+Contains graph algorithms.
+@{
     @defgroup grp_apsp All Pairs Shortest Path
-    @ingroup grp_graph
-
     @defgroup grp_bfs Breadth-First Search
-    @ingroup grp_graph
-    
+    @defgroup grp_graph_measures Measures
+    Graph Measures
+    @{
+        @defgroup grp_graph_closeness Closeness
+        @defgroup grp_graph_diameter Graph Diameter
+        @defgroup grp_graph_avg_path_length Average Path Length
+        @defgroup grp_graph_vertex_degrees In-Out degree
+    @}
     @defgroup grp_pagerank PageRank
-    @ingroup grp_graph
-
     @defgroup grp_sssp Single Source Shortest Path
-    @ingroup grp_graph
-
     @defgroup grp_wcc Weakly Connected Components
-    @ingroup grp_graph
+@}
 
 @defgroup grp_mdl Model Evaluation
 @{Contains functions for evaluating accuracy and validation of predictive 
methods. @}

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/06788cc4/src/ports/postgres/modules/graph/apsp.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/apsp.py_in 
b/src/ports/postgres/modules/graph/apsp.py_in
index ab8a566..42ee698 100644
--- a/src/ports/postgres/modules/graph/apsp.py_in
+++ b/src/ports/postgres/modules/graph/apsp.py_in
@@ -73,16 +73,16 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
 
         # Prepare the input for recording in the summary table
         if vertex_id is None:
-            v_st = "NULL"
+            v_st = ''
             vertex_id = "id"
         else:
             v_st = vertex_id
         if edge_args is None:
-            e_st = "NULL"
+            e_st = ''
         else:
             e_st = edge_args
         if grouping_cols is None:
-            g_st = "NULL"
+            g_st = ''
             glist = None
         else:
             g_st = grouping_cols
@@ -127,8 +127,7 @@ def graph_apsp(schema_madlib, vertex_table, vertex_id, 
edge_table,
         checkg_vv_sub = "TRUE"
         grp_by = ""
 
-        if grouping_cols is not None:
-
+        if grouping_cols:
             # We use actual table names in some cases and aliases in others
             # In some cases, we swap the table names so use of an alias is
             # necessary. In other cases, they are used to simplify debugging.
@@ -526,10 +525,10 @@ def graph_apsp_get_path(schema_madlib, apsp_table,
         dest = edge_params["dest"]
         weight = edge_params["weight"]
 
-        if vertex_id == "NULL":
+        if not vertex_id or vertex_id == "NULL":
             vertex_id = "id"
 
-        if grouping_cols == "NULL":
+        if not grouping_cols or grouping_cols == "NULL":
             grouping_cols = None
 
         select_grps = ""

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/06788cc4/src/ports/postgres/modules/graph/measures.py_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/measures.py_in 
b/src/ports/postgres/modules/graph/measures.py_in
new file mode 100644
index 0000000..f5c38b4
--- /dev/null
+++ b/src/ports/postgres/modules/graph/measures.py_in
@@ -0,0 +1,639 @@
+# coding=utf-8
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Graph Measures
+
+"""
+@file measures.py_in
+
+@namespace graph
+"""
+
+import plpy
+
+from utilities.control import MinWarning
+from utilities.utilities import _assert
+from utilities.utilities import add_postfix
+from utilities.utilities import extract_keyvalue_params
+
+from utilities.validate_args import get_cols
+from utilities.validate_args import unquote_ident
+from utilities.validate_args import table_exists
+from utilities.validate_args import table_is_empty
+from utilities.validate_args import columns_exist_in_table
+
+from graph_utils import get_graph_usage
+
+from collections import namedtuple
+from functools import partial
+
+
+class Graph(object):
+    """Class representing a Graph object"""
+    def __init__(self,
+                 vertex_table, vertex_col_names,
+                 edge_table, edge_col_names,
+                 grouping_cols=None,
+                 should_validate=True,
+                 schema_madlib='madlib'):
+        self.vertex_table = vertex_table
+        self.vertex_id_col = vertex_col_names['id']
+
+        self.edge_table = edge_table
+        self.edge_params = namedtuple('Edge', 'src dest weight')(
+            edge_col_names['src'], edge_col_names['dest'], 
edge_col_names['weight'])
+
+        self.grouping_cols = grouping_cols
+        self._madlib = schema_madlib
+
+        if should_validate:
+            self._validate()
+
+    # ----------------------------------------------------------------------
+
+    @staticmethod
+    def get_edge_params(edge_arg_str):
+        params_types = {'src': str, 'dest': str, 'weight': str}
+        default_args = {'src': 'src', 'dest': 'dest', 'weight': 'weight'}
+        return extract_keyvalue_params(edge_arg_str, params_types, 
default_args)
+    # ----------------------------------------------------------------------
+
+    def _validate(self):
+        _assert(self.vertex_table and
+                self.vertex_table.strip().lower() not in ('null', ''),
+                "Graph: Invalid vertex table name".format(**locals()))
+        _assert(table_exists(self.vertex_table),
+                "Graph: Vertex table ({0}) is 
missing".format(self.vertex_table))
+        _assert(not table_is_empty(self.vertex_table),
+                "Graph: Vertex table ({0}) is empty".format(self.vertex_table))
+
+        _assert(self.edge_table and self.edge_table.strip().lower() not in 
('null', ''),
+                "Graph: Invalid edge table name".format(**locals()))
+        _assert(table_exists(self.edge_table),
+                "Graph: Edge table ({0}) is missing".format(self.edge_table))
+        _assert(not table_is_empty(self.edge_table),
+                "Graph: Edge table ({0}) is empty".format(self.edge_table))
+
+        existing_cols = set(unquote_ident(i) for i in 
get_cols(self.vertex_table))
+        _assert(self.vertex_id_col in existing_cols,
+                "Graph: The vertex column {0} is not present in "
+                "vertex table ({1})".format(self.vertex_id_col, 
self.vertex_table))
+
+        _assert(columns_exist_in_table(self.edge_table, self.edge_params),
+                "Graph: Not all columns from {0} are present in edge table 
({1})".
+                format(self.edge_params, self.edge_table))
+    # ----------------------------------------------------------------------
+
+    def closeness(self, out_table, apsp_table, vertex_filter_expr=None, 
**kwargs):
+        """ Compute various centrality metrics
+
+        Following metrics are computed for a vertex, considering only
+        reachable vertices for that vertex:
+           - inverse_sum_dist: Inverse of the sum of shortest distances
+           - inverse_average_dist: Inverse of the average of shortest distances
+           - sum_inverse_dist: Sum of the inverse of shortest distances
+           - k_degree: Total number of reachable vertices
+
+        Args:
+            @param out_table: str. Name of table to store results in
+            @param apsp_table: str. APSP output is used to compute these 
metrics.
+            @param vertex_filter_expr: str. A WHERE clause can be specified to 
restrict
+                                       the output vertices
+        Returns:
+            None
+        """
+        grouping_cols_comma = self.grouping_cols + ", " if self.grouping_cols 
else ''
+        e = self.edge_params
+        filter_clause = "WHERE " + vertex_filter_expr if vertex_filter_expr 
else ''
+
+        plpy.execute("""
+            CREATE TABLE {out_table} AS
+            SELECT
+                {grouping_cols_comma}
+                {e.src},
+                1.0 / sum_dist AS inverse_sum_dist,
+                CASE WHEN k_degree = 0 THEN NULL
+                     ELSE k_degree::double precision / sum_dist
+                END AS inverse_avg_dist,
+                sum_inverse_dist,
+                k_degree
+            FROM (
+                -- For below measures treat 'Infinity' as NULL so that
+                -- 'sum' and 'avg' ignore the value.
+                SELECT
+                    {grouping_cols_comma}
+                    {e.src},
+                    sum(CASE WHEN {e.weight} = 'Infinity'::double precision 
THEN NULL
+                             WHEN {e.src} = {e.dest} THEN NULL
+                             ELSE {e.weight} END) AS sum_dist,
+                    sum(CASE WHEN {e.weight} = 'Infinity'::double precision 
THEN NULL
+                             WHEN {e.weight} = 0::double precision THEN 0.
+                             ELSE 1.0 / {e.weight} END) AS sum_inverse_dist,
+                    count(CASE WHEN {e.weight} = 'Infinity'::double precision 
THEN NULL
+                               WHEN {e.src} = {e.dest} THEN NULL
+                               ELSE 1 END) AS k_degree
+                FROM {apsp_table}
+                {filter_clause}
+                GROUP BY {grouping_cols_comma}
+                         {e.src}
+                -- Don't place the 'Infinity' checks above in a WHERE clause
+                -- since groups with only 'Infinity' rows need to show up
+                -- in output table
+            ) AS q
+        """.format(**locals()))
+    # 
--------------------------------------------------------------------------
+
+    def avg_path_length(self, out_table, apsp_table, vertex_filter_expr=None, 
**kwargs):
+        """ Compute the average path length of graph
+
+        This is the average of the shortest path distances between unique 
vertices.
+
+        Args:
+            @param out_table: str. Name of table to store results in
+            @param apsp_table: str. APSP output is used to compute these 
metrics.
+        Returns:
+            None
+        """
+        if self.grouping_cols:
+            grouping_cols_comma = self.grouping_cols + ", "
+            group_by_str = 'GROUP BY ' + self.grouping_cols
+        else:
+            grouping_cols_comma = group_by_str = ''
+        e = self.edge_params
+        filter_clause = "AND " + vertex_filter_expr if vertex_filter_expr else 
''
+        plpy.execute("""
+            CREATE TABLE {out_table} AS
+            SELECT
+                {grouping_cols_comma}
+                -- Filtering 'Infinity' occurs in CASE instead of WHERE clause
+                -- so that the edge is part of the average value i.e. part of
+                -- the count of paths but zero addition to sum of distances.
+                AVG(CASE WHEN {e.weight} = 'Infinity'::double precision
+                            THEN 0::double precision
+                            ELSE {e.weight}::double precision
+                    END) as avg_path_length
+            FROM {apsp_table}
+            WHERE {e.src} != {e.dest}
+                  {filter_clause}
+            {group_by_str}
+            """.format(**locals()))
+    # ----------------------------------------------------------------------
+
+    def in_out_degrees(self, out_table, **kwargs):
+        """
+        Args:
+            @param out_table: str. Name of table to store results
+
+        Returns:
+            None
+        """
+        # TODO: validate if output columns names are in grouping_cols
+        if self.grouping_cols:
+            grouping_cols_comma = self.grouping_cols + ", "
+        else:
+            grouping_cols_comma = ''
+        e = self.edge_params
+
+        plpy.execute("""
+            CREATE TABLE {out_table} AS
+            SELECT
+                {grouping_cols_comma}
+                in_q.vertex as {self.vertex_id_col},
+                indegree,
+                outdegree
+            FROM
+            (
+                SELECT
+                    {grouping_cols_comma}
+                    {e.dest} as vertex,
+                    count(*) as indegree
+                FROM {self.edge_table}
+                WHERE {e.src} != {e.dest}
+                GROUP BY {grouping_cols_comma}
+                         {e.dest}
+            ) as in_q
+            JOIN
+            (
+                SELECT
+                    {grouping_cols_comma}
+                    {e.src} as vertex,
+                    count(*) as outdegree
+                FROM {self.edge_table}
+                WHERE {e.src} != {e.dest}
+                GROUP BY {grouping_cols_comma}
+                         {e.src}
+            ) as out_q
+            USING ({grouping_cols_comma} vertex)
+            """.format(**locals()))
+    # 
--------------------------------------------------------------------------
+
+    def diameter(self, out_table, apsp_table, **kwargs):
+        """ Compute the diameter of graph
+
+        Diameter is defined as the maximum of the shortest path distances 
between
+        any pair of vertices
+        Args:
+            @param out_table: str. Name of table to store results in
+            @param apsp_table: str. APSP output is used to compute these 
metrics.
+        Returns:
+            None
+        """
+        if self.grouping_cols:
+            grouping_cols_comma = self.grouping_cols + ", "
+            group_by_str = 'GROUP BY ' + self.grouping_cols
+        else:
+            grouping_cols_comma = group_by_str = ''
+        e = self.edge_params
+        plpy.execute("""
+            CREATE TABLE {out_table} AS
+            SELECT
+                {grouping_cols_comma}
+                {e.weight} AS diameter,
+                {self._madlib}.matrix_agg(
+                        ARRAY[{e.src}, {e.dest}]::double 
precision[])::integer[]
+                    AS diameter_end_vertices
+            FROM
+                {apsp_table} JOIN
+                (
+                    SELECT
+                        {grouping_cols_comma}
+                        max({e.weight}) as {e.weight}
+                    FROM {apsp_table}
+                    WHERE {e.weight} != 'Infinity'::double precision
+                    {group_by_str}
+                ) q
+                USING ({grouping_cols_comma} {e.weight})
+            GROUP BY {grouping_cols_comma} {e.weight}
+            """.format(**locals()))
+# 
------------------------------------------------------------------------------
+
+
+def graph_apsp_measures(schema_madlib, apsp_table, out_table,
+                        measure_name, vertex_filter_expr=None, **kwargs):
+    """ Define measure that depend on APSP output
+
+    This function acts as a stub for all functions that depend on APSP being 
run
+    prior.
+
+    """
+
+    _assert(table_exists(apsp_table) and not table_is_empty(apsp_table),
+            "Graph: Invalid APSP table: {0}".format(apsp_table))
+
+    summary_table_name = add_postfix(apsp_table, "_summary")
+    summary_cols = ['vertex_table', 'vertex_id',
+                    'edge_table', 'edge_args', 'grouping_cols']
+    _assert(table_exists(summary_table_name),
+            "Graph: Summary APSP table ({0}) does not 
exist".format(summary_table_name))
+    _assert(columns_exist_in_table(summary_table_name, summary_cols, 
schema_madlib),
+            "Graph: Missing some columns from summary table 
({0})".format(summary_table_name))
+    _assert(out_table and out_table.strip().lower() not in ('null', ''),
+            "Graph: Invalid output table name ({0})".format(out_table))
+    _assert(not table_exists(out_table),
+            "Graph: Output table ({0}) already exists".format(out_table))
+
+    with MinWarning('warning'):
+        s = plpy.execute("SELECT * FROM {0}".format(summary_table_name))[0]
+        edge_col_names = Graph.get_edge_params(s['edge_args'])
+        g = Graph(s['vertex_table'], dict([('id', s['vertex_id'])]),
+                  s['edge_table'], edge_col_names,
+                  s['grouping_cols'],
+                  should_validate=False,
+                  schema_madlib=schema_madlib)
+        try:
+            measure_func = getattr(g, measure_name)
+            measure_func(out_table, apsp_table,
+                         vertex_filter_expr=vertex_filter_expr)
+        except AttributeError:
+            plpy.error('Measure {0} not implemented yet'.format(measure_name))
+# ----------------------------------------------------------------------
+
+
+graph_closeness = partial(graph_apsp_measures, measure_name='closeness')
+graph_diameter = partial(graph_apsp_measures, measure_name='diameter')
+graph_avg_path_length = partial(graph_apsp_measures, 
measure_name='avg_path_length')
+
+
+def graph_vertex_degrees(schema_madlib, vertex_table, vertex_id, edge_table,
+                         edge_args, out_table, grouping_cols, **kwargs):
+    """
+    Args:
+        @param schema_madlib: str. Name of MADlib schema
+        @param vertex_table: str. Table name containing vertex data
+        @param vertex_id: str. Column name containing ids of vertices
+        @param edge_table: str. Table name containing edge data
+        @param edge_args: str. Parameters describing edges
+        @param out_table: str. Name of table to store results
+        @param grouping_cols: str. Columns to group computation by
+
+
+    Returns:
+        None
+    """
+    _assert(out_table and out_table.strip().lower() not in ('null', ''),
+            "Graph: Invalid output table name!".format(**locals()))
+    _assert(not table_exists(out_table),
+            "Graph: Output table already exists!".format(**locals()))
+
+    if not vertex_id:
+        vertex_id = 'id'
+    edge_col_names = Graph.get_edge_params(edge_args)
+    g = Graph(vertex_table, dict([('id', vertex_id)]),
+              edge_table, edge_col_names, grouping_cols,
+              schema_madlib=schema_madlib)
+    g.in_out_degrees(out_table)
+# ----------------------------------------------------------------------
+
+# -----------------------------------------------------------------------
+# All help functions
+# -----------------------------------------------------------------------
+
+
+CREATE_GRAPH_EXAMPLE = """
+-- Create a graph, represented as vertex and edge tables.
+DROP TABLE IF EXISTS vertex,edge,out,out_summary,out_path;
+CREATE TABLE vertex(
+        id INTEGER
+        );
+CREATE TABLE edge(
+        src INTEGER,
+        dest INTEGER,
+        weight DOUBLE PRECISION
+);
+
+INSERT INTO vertex VALUES
+(0),
+(1),
+(2),
+(3),
+(4),
+(5),
+(6),
+(7)
+;
+INSERT INTO edge VALUES
+(0, 1, 1),
+(0, 2, 1),
+(0, 4, 10),
+(1, 2, 2),
+(1, 3, 10),
+(2, 3, 1),
+(2, 5, 1),
+(2, 6, 3),
+(3, 0, 1),
+(4, 0, -2),
+(5, 6, 1),
+(6, 7, 1)
+;
+"""
+
+COMPUTE_APSP_EXAMPLE = """
+-- Compute the apsp:
+DROP TABLE IF EXISTS out;
+SELECT graph_apsp(
+    'vertex',                            -- Vertex table
+    'id',                                -- Vertix id column
+    'edge',                              -- Edge table
+    'src=src, dest=dest, weight=weight', -- Comma delimited string of edge 
arguments
+    'out'                                -- Output table of apsp
+);
+"""
+
+
+def graph_closeness_help(schema_madlib, message, **kwargs):
+
+    intro = """
+The Closeness function returns various closeness centrality measures and the
+k-degree for given subset of vertices. The closeness measures are the inverse 
of
+the sum, the inverse of the average, and the sum of inverses of the shortest
+distances to all reachable target vertices (excluding the source vertex).
+    """
+
+    if not message:
+        help_string = intro + """
+For more details:
+    SELECT {schema_madlib}.graph_closeness('usage')
+        """
+    elif message.lower() in ['usage', 'help', '?']:
+        help_string = intro + """
+
+----------------------------------------------------------------------------
+                            USAGE
+----------------------------------------------------------------------------
+SELECT {schema_madlib}.graph_closeness(
+    apsp_table TEXT,            -- Name of table containing APSP results
+    out_table TEXT,             -- Name of table to store Closeness measuress
+    vertex_filter_expr TEXT     -- Valid PostgreSQL expression that describes 
the
+                                -- vertices to generate closeness measures for.
+)
+
+----------------------------------------------------------------------------
+                            OUTPUT
+----------------------------------------------------------------------------
+The output table contains a row for every vertex of every group and have
+the following columns (in addition to the grouping columns):
+  - inverse_sum_dist    : Inverse of the sum of shortest distances to all 
reachable
+                            vertices.
+  - inverse_average_dist: Inverse of the average of shortest distances to all
+                            reachable vertices.
+  - sum_inverse_dist    : Sum of the inverse of shortest distances to all 
reachable
+                            vertices.
+  - k_degree            : Total number of reachable vertices.
+
+        """
+    elif message.lower() in ['example', 'examples']:
+        help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+{create_graph_example}
+{compute_apsp_example}
+
+-# Compute the closeness measure for all nodes:
+DROP TABLE IF EXISTS out_closeness;
+SELECT {schema_madlib}.graph_closeness('out_apsp', 'out_closeness');
+SELECT * FROM out_closeness;
+        """
+    else:
+        help_string = "No such option. Use {schema_madlib}.graph_closeness()"
+
+    return help_string.format(schema_madlib=schema_madlib,
+                              create_graph_example=CREATE_GRAPH_EXAMPLE,
+                              compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+# -------------------------------------------------------------------------
+
+
+def graph_diameter_help(schema_madlib, message, **kwargs):
+
+    intro = """
+Diameter is defined as the longest of all shortest paths in a graph.
+    """
+
+    if not message:
+        help_string = intro + """
+For more details:
+    SELECT {schema_madlib}.graph_diameter('usage')
+        """
+    elif message.lower() in ['usage', 'help', '?']:
+        help_string = intro + """
+
+----------------------------------------------------------------------------
+                            USAGE
+----------------------------------------------------------------------------
+SELECT {schema_madlib}.graph_diameter(
+    apsp_table TEXT,            -- Name of table containing APSP results
+    out_table TEXT              -- Name of table to store Closeness measuress
+)
+
+----------------------------------------------------------------------------
+                            OUTPUT
+----------------------------------------------------------------------------
+It contains a row for every group, the diameter value and the two vertices
+that are the farthest apart.
+        """
+    elif message.lower() in ['example', 'examples']:
+        help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+{create_graph_example}
+{compute_apsp_example}
+
+-# Compute the diameter measure for the graph:
+DROP TABLE IF EXISTS out_diameter;
+SELECT {schema_madlib}.graph_diameter('out_apsp', 'out_diameter');
+SELECT * FROM out_diameter;
+        """
+    else:
+        help_string = "No such option. Use {schema_madlib}.graph_diameter()"
+
+    return help_string.format(schema_madlib=schema_madlib,
+                              create_graph_example=CREATE_GRAPH_EXAMPLE,
+                              compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+# -------------------------------------------------------------------------
+
+
+def graph_avg_path_length_help(schema_madlib, message, **kwargs):
+
+    intro = """
+This function computes the average of the shortest paths between each pair of
+vertices. Average path length is based on "reachable target vertices", so it
+ignores infinite-length paths between vertices that are not connected.
+    """
+
+    if not message:
+        help_string = intro + """
+For more details:
+    SELECT {schema_madlib}.graph_avg_path_length('usage')
+        """
+    elif message.lower() in ['usage', 'help', '?']:
+        help_string = intro + """
+
+----------------------------------------------------------------------------
+                            USAGE
+----------------------------------------------------------------------------
+SELECT {schema_madlib}.graph_avg_path_length(
+    apsp_table TEXT,            -- Name of table containing APSP results
+    out_table TEXT              -- Name of table to store Closeness measuress
+)
+
+----------------------------------------------------------------------------
+                            OUTPUT
+----------------------------------------------------------------------------
+It contains a row for every group, and the average path value.
+"""
+    elif message.lower() in ['example', 'examples']:
+        help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+{create_graph_example}
+{compute_apsp_example}
+
+-# Compute the average path length for the graph:
+DROP TABLE IF EXISTS out_avg_path_length;
+SELECT {schema_madlib}.graph_avg_path_length('out_apsp', 
'out_avg_path_length');
+SELECT * FROM out_avg_path_length;
+        """
+    else:
+        help_string = "No such option. Use 
{schema_madlib}.graph_avg_path_length()"
+
+    return help_string.format(schema_madlib=schema_madlib,
+                              create_graph_example=CREATE_GRAPH_EXAMPLE,
+                              compute_apsp_example=COMPUTE_APSP_EXAMPLE)
+# -------------------------------------------------------------------------
+
+
+def graph_vertex_degrees_help(schema_madlib, message, **kwargs):
+
+    intro = """
+This function computes the degree of each node. The node degree is the number 
of
+edges adjacent to that node. The node in-degree is the number of edges pointing
+in to the node and node out-degree is the number of edges pointing out of the
+node.
+    """
+    usage_text = get_graph_usage(schema_madlib,
+                                 'graph_vertex_degrees',
+                                 """
+            out_table     TEXT, -- Name of the table to store the result of 
apsp.
+            grouping_cols TEXT  -- The list of grouping columns.""")
+
+    if not message:
+        help_string = intro + """
+For more details:
+    SELECT {schema_madlib}.graph_vertex_degrees('usage')
+        """
+    elif message.lower() in ['usage', 'help', '?']:
+        help_string = intro + """
+{graph_usage}
+
+----------------------------------------------------------------------------
+                            OUTPUT
+----------------------------------------------------------------------------
+It contains a row for every vertex of every group and has the following columns
+(in addition to the grouping columns):
+  - vertex      : The id for the source vertex. Will use the input vertex
+                    column 'id' for column naming.
+  - indegree    : Number of incoming edges to the vertex.
+  - outdegree   : Number of outgoing edges from the vertex.
+
+"""
+    elif message.lower() in ['example', 'examples']:
+        help_string = """
+----------------------------------------------------------------------------
+                                EXAMPLES
+----------------------------------------------------------------------------
+{create_graph_example}
+
+DROP TABLE IF EXISTS degrees;
+SELECT {schema_madlib}.graph_vertex_degrees(
+    'vertex',      -- Vertex table
+    'id',          -- Vertix id column (NULL means use default naming)
+    'edge',        -- Edge table
+    'src=src, dest=dest, weight=weight',
+    'degrees');        -- Output table of shortest paths
+SELECT * FROM degrees ORDER BY id;
+        """
+    else:
+        help_string = "No such option. Use 
{schema_madlib}.graph_vertex_degrees()"
+
+    return help_string.format(schema_madlib=schema_madlib,
+                              create_graph_example=CREATE_GRAPH_EXAMPLE,
+                              graph_usage=usage_text)
+# -------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/06788cc4/src/ports/postgres/modules/graph/measures.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/measures.sql_in 
b/src/ports/postgres/modules/graph/measures.sql_in
new file mode 100644
index 0000000..1992139
--- /dev/null
+++ b/src/ports/postgres/modules/graph/measures.sql_in
@@ -0,0 +1,816 @@
+/* ----------------------------------------------------------------------- 
*//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * @file closeness.sql_in
+ *
+ * @brief SQL functions for graph analytics
+ * @date Jun 2017
+ *
+ * @sa Provides analytical measures for graphs
+ *
+ *//* ----------------------------------------------------------------------- 
*/
+m4_include(`SQLCommon.m4')
+
+/**
+@addtogroup grp_graph_closeness
+@brief Computes the closeness centrality value of each node in the graph.
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#closeness">Closeness</a></li>
+<li><a href="#examples">Examples</a></li>
+</ul>
+</div>
+
+The Closeness function returns various closeness centrality measures and the
+k-degree for given subset of vertices. The closeness measures are the inverse 
of
+the sum, the inverse of the average, and the sum of inverses of the shortest
+distances to all reachable target vertices (excluding the source vertex).
+
+@note The closeness measures require a valid output from a prior APSP run - 
both
+the APSP table and the associated output summary table. APSP is a
+computationally expensive algorithm because it finds the shortest path between
+all nodes in the graph. The worst case run-time for this implementation is 
O(V^2
+* E) where V is the number of vertices and E is the number of edges.  In
+practice, run-time will be generally be much less than this, depending on the
+graph.
+
+@anchor closeness
+@par Closeness
+<pre class="syntax">
+graph_closeness( apsp_table,
+                 output_table,
+                 vertex_filter_expr
+               )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>apsp_table</dt>
+<dd>TEXT. Name of the output table generated by a prior run of all pairs 
shortest path (APSP).
+</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the closeness measures.
+It contains a row for every vertex of every group and have
+the following columns (in addition to the grouping columns):
+  - inverse_sum_dist: Inverse of the sum of shortest distances to all reachable
+  vertices.
+  - inverse_average_dist: Inverse of the average of shortest distances to all
+  reachable vertices.
+  - sum_inverse_dist: Sum of the inverse of shortest distances to all reachable
+  vertices.
+  - k_degree: Total number of reachable vertices.
+</dd>
+
+<dt>vertex_filter_expr (optional)</dt>
+
+<dd>TEXT, default = NULL. Valid PostgreSQL expression that describes the
+ vertices to generate closeness measures for.  If this parameter is not
+specified, closeness measures are generated for all vertices in the apsp table.
+This input should be treated like a WHERE clause.
+
+Some example inputs:
+- If you want a short list of vertices, say 1, 2 and 3:
+<pre>vertex_id IN (1, 2, 3)</pre>
+- If you want a range of vertices between 1000 and 2000:
+<pre>vertix_id BETWEEN 1000 AND 2000</pre>
+- If you want a set of vertices from a separate table satisfying to a condition
+<pre>EXISTS (SELECT vertex_id FROM vertices_of_interest
+             WHERE vertex_id > 5000 AND condition = 'xyz')
+</pre>
+
+</dd>
+</dl>
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER,
+        name TEXT
+        );
+CREATE TABLE edge(
+        src_id INTEGER,
+        dest_id INTEGER,
+        edge_weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0, 'A'),
+(1, 'B'),
+(2, 'C'),
+(3, 'D'),
+(4, 'E'),
+(5, 'F'),
+(6, 'G'),
+(7, 'H');
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+</pre>
+
+-# Calculate the all-pair shortest paths:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT madlib.graph_apsp('vertex',      -- Vertex table
+                         'id',          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                                        -- Edge arguments (NULL means use 
default naming)
+                         'out_apsp');        -- Output table of shortest paths
+</pre>
+
+-# Compute the closeness measure for all nodes:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_closeness;
+SELECT madlib.graph_closeness('out_apsp', 'out_closeness');
+SELECT * FROM out_closeness;
+</pre>
+<pre class="result">
+ src_id |  inverse_sum_dist  | inverse_avg_dist  | sum_inverse_dist | k_degree
+\--------+--------------------+-------------------+------------------+----------
+      1 | 0.0285714285714286 |               0.2 | 1.93809523809524 |        7
+      3 | 0.0357142857142857 |              0.25 | 2.87424242424242 |        7
+      4 |                 -1 |                -7 |               -1 |        7
+      0 | 0.0434782608695652 | 0.304347826086957 | 3.68333333333333 |        7
+      6 |                  1 |                 1 |                1 |        1
+      2 | 0.0416666666666667 | 0.291666666666667 |             3.75 |        7
+      5 |  0.333333333333333 | 0.666666666666667 |              1.5 |        2
+      7 |             [NULL] |            [NULL] |                0 |        0
+(8 rows)
+</pre>
+
+-# Create a graph with 2 groups and find APSP for each group:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre>
+
+-# Find APSP for all groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_apsp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                         'out_gr',      -- Output table of shortest paths
+                         'grp'          -- Grouping columns
+);
+</pre>
+
+-# Compute closeness measure for vertex 0 to vertex 5 in every group
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr_path;
+SELECT madlib.graph_closeness('out_gr', 'out_gr_closeness', 'src_id >= 0 and 
src_id <=5');
+SELECT * FROM out_gr_closeness ORDER BY grp;
+</pre>
+<pre class="result">
+ grp | src_id |  inverse_sum_dist   |  inverse_avg_dist  | sum_inverse_dist  | 
k_degree
+-------+----------+-----------------------+----------------------+---------------------+------------
+   0 |      0 |  0.0434782608695652 |  0.304347826086957 |  3.68333333333333 | 
       7
+   0 |      5 |   0.333333333333333 |  0.666666666666667 |               1.5 | 
       2
+   0 |      4 |                  -1 |                 -7 |                -1 | 
       7
+   0 |      3 |  0.0357142857142857 |               0.25 |  2.87424242424242 | 
       7
+   0 |      1 |  0.0285714285714286 |                0.2 |  1.93809523809524 | 
       7
+   0 |      2 |  0.0416666666666667 |  0.291666666666667 |              3.75 | 
       7
+   1 |      3 |   0.142857142857143 |  0.714285714285714 |  1.97979797979798 | 
       5
+   1 |      5 |              [NULL] |             [NULL] |                 0 | 
       0
+   1 |      0 |                0.25 |               1.25 |               2.5 | 
       5
+   1 |      1 |  0.0588235294117647 |  0.294117647058824 | 0.988095238095238 | 
       5
+   1 |      2 |                 0.1 |                0.5 |  1.79166666666667 | 
       5
+   1 |      4 | -0.0416666666666667 | -0.208333333333333 |             -2.55 | 
       5
+(12 rows)
+</pre>
+
+*/
+-------------------------------------------------------------------------
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
+    apsp_table TEXT,
+    out_table TEXT,
+    vertex_filter_expr TEXT
+) RETURNS VOID AS $$
+    PythonFunction(graph, measures, graph_closeness)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
+    apsp_table TEXT,
+    out_table TEXT
+) RETURNS VOID AS $$
+    SELECT MADLIB_SCHEMA.graph_closeness($1, $2, NULL);
+$$ LANGUAGE SQL VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness(
+    message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunction(graph, measures, graph_closeness_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_closeness()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.graph_closeness('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+/**
+@addtogroup grp_graph_diameter
+@brief Computes the diameter of a graph.
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#diameter">Diameter</a></li>
+<li><a href="#examples">Examples</a></li>
+</ul>
+</div>
+
+Diameter is defined as the longest of all shortest paths in a graph.
+
+@note This function assumes a valid output from a prior APSP run - both the 
APSP
+table and the associated output summary table. APSP is a computationally
+expensive algorithm because it finds the shortest path between all nodes in the
+graph. The worst case run-time for this implementation is O(V^2 * E) where V is
+the number of vertices and E is the number of edges.  In practice, run-time 
will
+be generally be much less than this, depending on the graph.
+
+@anchor diameter
+@par Diameter
+<pre class="syntax">
+graph_diameter( apsp_table,
+                output_table
+               )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>apsp_table</dt>
+<dd>TEXT. Name of the output table generated by a prior run of all pairs 
shortest path (APSP).
+</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the diameter. It contains a row for every
+group, the diameter value and the two vertices that are the farthest apart.
+</dd>
+
+</dl>
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER,
+        name TEXT
+        );
+CREATE TABLE edge(
+        src_id INTEGER,
+        dest_id INTEGER,
+        edge_weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0, 'A'),
+(1, 'B'),
+(2, 'C'),
+(3, 'D'),
+(4, 'E'),
+(5, 'F'),
+(6, 'G'),
+(7, 'H');
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+</pre>
+
+-# Calculate the all-pair shortest paths:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT madlib.graph_apsp('vertex',      -- Vertex table
+                         'id',          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                                        -- Edge arguments (NULL means use 
default naming)
+                         'out_apsp');        -- Output table of shortest paths
+</pre>
+
+-# Compute the diameter measure for the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_diameter;
+SELECT madlib.graph_diameter('out_apsp', 'out_diameter');
+SELECT * FROM out_diameter;
+</pre>
+<pre class="result">
+diameter | diameter_end_vertices
+\---------+-----------------------
+      14 | {{1,4}}
+(1 row)
+</pre>
+
+-# Create a graph with 2 groups and find APSP for each group:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre>
+
+-# Find APSP for all groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_apsp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                         'out_gr',      -- Output table of shortest paths
+                         'grp'          -- Grouping columns
+);
+</pre>
+
+-# Find the diameter of graph in every group
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr_path;
+SELECT madlib.graph_diameter('out_gr', 'out_gr_diameter');
+SELECT * FROM out_gr_diameter ORDER BY grp;
+</pre>
+<pre class="result">
+grp | diameter | diameter_end_vertices
+\------+------------+-------------------------
+  0 |       14 | {{1,4}}
+  1 |       14 | {{1,4}}
+(2 rows)
+</pre>
+
+*/
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter(
+    apsp_table TEXT,
+    out_table TEXT
+) RETURNS VOID AS $$
+    PythonFunction(graph, measures, graph_diameter)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter(
+    message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunction(graph, measures, graph_diameter_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_diameter()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.graph_diameter('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+/**
+@addtogroup grp_graph_avg_path_length
+@brief Computes the average shortest-path length of a graph.
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#avg_path_length">Average Path Length</a></li>
+<li><a href="#examples">Examples</a></li>
+</ul>
+</div>
+
+This function computes the average of the shortest paths between each pair of
+vertices. Average path length is based on "reachable target vertices", so it
+ignores infinite-length paths between vertices that are not connected.
+
+@note This function assumes a valid output from a prior APSP run - both the 
APSP
+table and the associated output summary table. APSP is a computationally
+expensive algorithm because it finds the shortest path between all nodes in the
+graph. The worst case run-time for this implementation is O(V^2 * E) where V is
+the number of vertices and E is the number of edges.  In practice, run-time 
will
+be generally be much less than this, depending on the graph.
+
+@anchor avg_path_length
+@par Average Path Length
+<pre class="syntax">
+graph_avg_path_length( apsp_table,
+                       output_table
+                     )
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>apsp_table</dt>
+<dd>TEXT. Name of the output table generated by a prior run of all pairs
+shortest path (APSP).
+</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the average path length. It contains a row
+for every group, and the average path value.
+</dd>
+</dl>
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER,
+        name TEXT
+        );
+CREATE TABLE edge(
+        src_id INTEGER,
+        dest_id INTEGER,
+        edge_weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0, 'A'),
+(1, 'B'),
+(2, 'C'),
+(3, 'D'),
+(4, 'E'),
+(5, 'F'),
+(6, 'G'),
+(7, 'H');
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+</pre>
+
+-# Calculate the all-pair shortest paths:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT madlib.graph_apsp('vertex',      -- Vertex table
+                         'id',          -- Vertix id column (NULL means use 
default naming)
+                         'edge',        -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                                        -- Edge arguments (NULL means use 
default naming)
+                         'out_apsp');        -- Output table of shortest paths
+</pre>
+
+-# Compute the average path length measure:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_avg_path_length;
+SELECT madlib.graph_avg_path_length('out_apsp', 'out_avg_path_length');
+SELECT * FROM out_avg_path_length;
+</pre>
+<pre class="result">
+ avg_path_length
+\------------------
+ 2.01785714285714
+(1 row)
+</pre>
+
+-# Create a graph with 2 groups and find APSP for each group:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre>
+
+-# Find APSP for all groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr, out_gr_summary;
+SELECT madlib.graph_apsp(
+                         'vertex',      -- Vertex table
+                         NULL,          -- Vertex id column (NULL means use 
default naming)
+                         'edge_gr',     -- Edge table
+                         'src=src_id, dest=dest_id, weight=edge_weight',
+                         'out_gr',      -- Output table of shortest paths
+                         'grp'          -- Grouping columns
+);
+</pre>
+
+-# Find the average path length in every group
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr_path;
+SELECT madlib.graph_avg_path_length('out_gr', 'out_gr_path');
+SELECT * FROM out_gr_path ORDER BY grp;
+</pre>
+<pre class="result">
+ grp |  avg_path_length
+\-------+--------------------
+   0 |  2.01785714285714
+   1 | 0.466666666666667
+(2 rows)
+</pre>
+*/
+
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length(
+    apsp_table TEXT,
+    out_table TEXT
+) RETURNS VOID AS $$
+    PythonFunction(graph, measures, graph_avg_path_length)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length(
+    message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunction(graph, measures, graph_avg_path_length_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_avg_path_length()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.graph_avg_path_length('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+/**
+@addtogroup grp_graph_vertex_degrees
+@brief Computes the degrees for each vertex
+
+<div class="toc"><b>Contents</b>
+<ul>
+<li><a href="#degrees">In-out degrees</a></li>
+<li><a href="#examples">Examples</a></li>
+</ul>
+</div>
+
+This function computes the degree of each node. The node degree is the number 
of
+edges adjacent to that node. The node in-degree is the number of edges pointing
+in to the node and node out-degree is the number of edges pointing out of the
+node.
+
+@anchor degrees
+@par In-out degrees
+<pre class="syntax">
+graph_vertex_degrees(
+    vertex_table,
+    vertex_id,
+    edge_table,
+    edge_args,
+    out_table,
+    grouping_cols
+)
+</pre>
+
+\b Arguments
+<dl class="arglist">
+<dt>vertex_table</dt>
+<dd>TEXT. Name of the table containing the vertex data for the graph. Must
+contain the column specified in the 'vertex_id' parameter below.</dd>
+
+<dt>vertex_id</dt>
+<dd>TEXT, default = 'id'. Name of the column in 'vertex_table' containing
+vertex ids.  The vertex ids are of type INTEGER with no duplicates.
+They do not need to be contiguous.</dd>
+
+<dt>edge_table</dt>
+<dd>TEXT. Name of the table containing the edge data. The edge table must
+contain columns for source vertex, destination vertex and edge weight.
+Column naming convention is described below in the 'edge_args' parameter.</dd>
+
+<dt>edge_args</dt>
+<dd>TEXT. A comma-delimited string containing multiple named arguments of
+the form "name=value". The following parameters are supported for
+this string argument:
+  - src (INTEGER): Name of the column containing the source vertex ids in the
+  edge table. Default column name is 'src'.
+  - dest (INTEGER): Name of the column containing the destination vertex ids
+  in the edge table. Default column name is 'dest'.
+  - weight (FLOAT8): Name of the column containing the edge weights in the
+  edge table. Default column name is 'weight'.</dd>
+
+<dt>out_table</dt>
+<dd>TEXT. Name of the table to store the result.
+It contains a row for every vertex of every group and has
+the following columns (in addition to the grouping columns):
+  - vertex: The id for the source vertex. Will use the input vertex
+  column 'id' for column naming.
+  - indegree: Number of incoming edges to the vertex.
+  - outdegree: Number of outgoing edges from the vertex.
+
+<dt>grouping_cols</dt>
+<dd>TEXT, default = NULL. List of columns used to group the input into discrete
+subgraphs. These columns must exist in the edge table. When this value is null,
+no grouping is used and a single result is generated. </dd>
+</dl>
+
+@anchor examples
+@examp
+
+-# Create vertex and edge tables to represent the graph:
+<pre class="syntax">
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER,
+        name TEXT
+        );
+CREATE TABLE edge(
+        src_id INTEGER,
+        dest_id INTEGER,
+        edge_weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0, 'A'),
+(1, 'B'),
+(2, 'C'),
+(3, 'D'),
+(4, 'E'),
+(5, 'F'),
+(6, 'G'),
+(7, 'H');
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+</pre>
+
+-# Calculate the in-out degrees for each node:
+<pre class="syntax">
+DROP TABLE IF EXISTS degrees;
+SELECT madlib.graph_vertex_degrees(
+    'vertex',      -- Vertex table
+    'id',          -- Vertix id column (NULL means use default naming)
+    'edge',        -- Edge table
+    'src=src_id, dest=dest_id, weight=edge_weight',
+    'degrees');        -- Output table of shortest paths
+SELECT * FROM degrees ORDER BY id;
+</pre>
+<pre class="result">
+ id | indegree | outdegree
+\----+----------+-----------
+  0 |        2 |         3
+  1 |        1 |         2
+  2 |        2 |         3
+  3 |        2 |         1
+  4 |        1 |         1
+  5 |        1 |         1
+  6 |        2 |         1
+(7 rows)
+</pre>
+
+-# Create a graph with 2 groups and find degrees for each group:
+<pre class="syntax">
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+</pre>
+
+-# Find APSP for all groups:
+<pre class="syntax">
+DROP TABLE IF EXISTS out_gr;
+SELECT madlib.graph_vertex_degrees(
+    'vertex',      -- Vertex table
+    NULL,          -- Vertex id column (NULL means use default naming)
+    'edge_gr',     -- Edge table
+    'src=src_id, dest=dest_id, weight=edge_weight',
+    'out_gr',      -- Output table of shortest paths
+    'grp'          -- Grouping columns
+);
+SELECT * FROM out_gr WHERE id < 2 ORDER BY grp, id;
+</pre>
+<pre class="result">
+ grp | id | indegree |   outdegree
+\-------+------+------------+-------------
+   0 |  0 |        2 |         3
+   0 |  1 |        1 |         2
+   1 |  0 |        2 |         3
+   1 |  1 |        1 |         2
+(4 rows)
+</pre>
+*/
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    out_table               TEXT,
+    grouping_cols           TEXT
+) RETURNS VOID AS $$
+    PythonFunction(graph, measures, graph_vertex_degrees)
+$$ LANGUAGE plpythonu VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
+    vertex_table            TEXT,
+    vertex_id               TEXT,
+    edge_table              TEXT,
+    edge_args               TEXT,
+    out_table               TEXT
+) RETURNS VOID AS $$
+     SELECT MADLIB_SCHEMA.graph_vertex_degrees($1, $2, $3, $4, $5, NULL);
+$$ LANGUAGE sql VOLATILE
+m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
+-------------------------------------------------------------------------------
+
+-- Online help
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees(
+    message VARCHAR
+) RETURNS VARCHAR AS $$
+    PythonFunction(graph, measures, graph_vertex_degrees_help)
+$$ LANGUAGE plpythonu IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
+
+--------------------------------------------------------------------------------
+
+CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.graph_vertex_degrees()
+RETURNS VARCHAR AS $$
+    SELECT MADLIB_SCHEMA.graph_vertex_degrees('');
+$$ LANGUAGE sql IMMUTABLE
+m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

http://git-wip-us.apache.org/repos/asf/incubator-madlib/blob/06788cc4/src/ports/postgres/modules/graph/test/measures.sql_in
----------------------------------------------------------------------
diff --git a/src/ports/postgres/modules/graph/test/measures.sql_in 
b/src/ports/postgres/modules/graph/test/measures.sql_in
new file mode 100644
index 0000000..b0bf95e
--- /dev/null
+++ b/src/ports/postgres/modules/graph/test/measures.sql_in
@@ -0,0 +1,150 @@
+/* ----------------------------------------------------------------------- 
*//**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+/* ----------------------------------------------------------------------- */
+
+-- Create vertex and edge tables to represent the graph:
+DROP TABLE IF EXISTS vertex, edge;
+CREATE TABLE vertex(
+        id INTEGER,
+        name TEXT
+        );
+CREATE TABLE edge(
+        src_id INTEGER,
+        dest_id INTEGER,
+        edge_weight FLOAT8
+        );
+INSERT INTO vertex VALUES
+(0, 'A'),
+(1, 'B'),
+(2, 'C'),
+(3, 'D'),
+(4, 'E'),
+(5, 'F'),
+(6, 'G'),
+(7, 'H');
+INSERT INTO edge VALUES
+(0, 1, 1.0),
+(0, 2, 1.0),
+(0, 4, 10.0),
+(1, 2, 2.0),
+(1, 3, 10.0),
+(2, 3, 1.0),
+(2, 5, 1.0),
+(2, 6, 3.0),
+(3, 0, 1.0),
+(4, 0, -2.0),
+(5, 6, 1.0),
+(6, 7, 1.0);
+
+-- Calculate the all-pair shortest paths:
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT graph_apsp('vertex',      -- Vertex table
+                  'id',          -- Vertix id column (NULL means use default 
naming)
+                  'edge',        -- Edge table
+                  'src=src_id, dest=dest_id, weight=edge_weight',
+                                 -- Edge arguments (NULL means use default 
naming)
+                  'out_apsp');   -- Output table of shortest paths
+
+-- Compute the closeness measure for all nodes:
+DROP TABLE IF EXISTS out_closeness;
+SELECT graph_closeness('out_apsp', 'out_closeness');
+SELECT * FROM out_closeness;
+
+SELECT assert(relative_error(inverse_sum_dist, 0.04347) < 1e-2 and
+              relative_error(inverse_avg_dist, 0.3043) < 1e-2 and
+              relative_error(sum_inverse_dist, 3.6833) < 1e-2 and
+              k_degree = 7,
+              'Incorrect value for closeness')
+FROM out_closeness
+WHERE src_id = 0;
+
+-- Compute the diameter measure for graph
+DROP TABLE IF EXISTS out_diameter;
+SELECT graph_diameter('out_apsp', 'out_diameter');
+SELECT * FROM out_diameter;
+SELECT assert(diameter=14, 'Invalid value for diameter') FROM out_diameter;
+
+-- Compute the average path length measure for graph
+DROP TABLE IF EXISTS out_avg_path_length;
+SELECT graph_avg_path_length('out_apsp', 'out_avg_path_length');
+SELECT * FROM out_avg_path_length;
+SELECT assert(relative_error(avg_path_length, 2.0178) < 1e-2,
+              'Invalid value for avg_path_length') FROM out_avg_path_length;
+
+-- Compute the in and out degrees
+DROP TABLE IF EXISTS out_degrees;
+SELECT graph_vertex_degrees('vertex',      -- Vertex table
+                     'id',          -- Vertix id column (NULL means use 
default naming)
+                     'edge',        -- Edge table
+                     'src=src_id, dest=dest_id, weight=edge_weight',
+                                 -- Edge arguments (NULL means use default 
naming)
+                     'out_degrees');
+SELECT * FROM out_degrees;
+SELECT assert(indegree = 2 and outdegree = 3, 'Invalid value for degrees')
+FROM out_degrees
+WHERE id = 0;
+-------------------------------------------------------------------------
+-- Grouping -----------------------------------------------------------
+------------------------------------------------------------------------------
+
+-- Create a graph with 2 groups and find APSP for each group:
+DROP TABLE IF EXISTS edge_gr;
+CREATE TABLE edge_gr AS
+(
+  SELECT *, 0 AS grp FROM edge
+  UNION
+  SELECT *, 1 AS grp FROM edge WHERE src_id < 6 AND dest_id < 6
+);
+INSERT INTO edge_gr VALUES
+(4,5,-20,1);
+
+-- Find APSP for all groups:
+DROP TABLE IF EXISTS out_apsp, out_apsp_summary;
+SELECT graph_apsp('vertex',      -- Vertex table
+                  'id',          -- Vertex id column (NULL means use default 
naming)
+                  'edge_gr',     -- Edge table
+                  'src=src_id, dest=dest_id, weight=edge_weight',
+                  'out_apsp',      -- Output table of shortest paths
+                  'grp'          -- Grouping columns
+);
+
+DROP TABLE IF EXISTS out_closeness;
+SELECT graph_closeness('out_apsp', 'out_closeness');
+SELECT * FROM out_closeness ORDER BY grp, src_id;
+
+DROP TABLE IF EXISTS out_diameter;
+SELECT graph_diameter('out_apsp', 'out_diameter');
+SELECT * FROM out_diameter ORDER BY grp;
+
+-- Compute the closeness measure for all nodes:
+DROP TABLE IF EXISTS out;
+SELECT graph_avg_path_length('out_apsp', 'out');
+SELECT * FROM out ORDER BY grp;
+
+-- Compute the closeness measure for all nodes:
+DROP TABLE IF EXISTS out_degrees;
+SELECT graph_vertex_degrees('vertex',      -- Vertex table
+                     'id',          -- Vertix id column (NULL means use 
default naming)
+                     'edge_gr',        -- Edge table
+                     'src=src_id, dest=dest_id, weight=edge_weight',
+                     'out_degrees',
+                     'grp');
+SELECT * FROM out_degrees ORDER BY grp, id;

Reply via email to