This is the primary patch that fiddles with the path recommendation
code. A few notes:
1) b/c I want to keep track of how many remote destinations there can
be, the 'remote_guids' array now stores all remote destinations, not
just the ones we have already forwarded to.
2) b/c I may need to free memory, I now "goto Exit" instead of just
calling 'return' many times.
3) Although the option is called 'port_offsetting', I actually "offset"
both the remote destination I send to and the port pointing towards that
remote destination.
Al
--
Albert Chu
[EMAIL PROTECTED]
925-422-5311
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
>From 57eb4d9bf55fbbbf39dc1c7ddfeeb2cae4776ef0 Mon Sep 17 00:00:00 2001
From: Albert L. Chu <[EMAIL PROTECTED]>
Date: Thu, 20 Mar 2008 16:23:13 -0700
Subject: [PATCH] implement port_offsetting
Signed-off-by: Albert L. Chu <[EMAIL PROTECTED]>
---
opensm/include/opensm/osm_switch.h | 7 +-
opensm/opensm/osm_dump.c | 3 +-
opensm/opensm/osm_switch.c | 246 ++++++++++++++++++++++++++++++++----
opensm/opensm/osm_ucast_mgr.c | 14 ++-
4 files changed, 239 insertions(+), 31 deletions(-)
diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h
index 2624d5f..45f4718 100644
--- a/opensm/include/opensm/osm_switch.h
+++ b/opensm/include/opensm/osm_switch.h
@@ -997,7 +997,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
IN const boolean_t dor,
IN OUT osm_switch_guid_count_t * remote_guids,
IN OUT uint16_t * p_num_remote_guids,
- IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used);
+ IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
+ IN uint16_t port_offsetting_lids_per_port);
/*
* PARAMETERS
* p_sw
@@ -1031,6 +1032,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
* [in out] The specific osm_switch_guid_count_t used
* in switch recommendations.
*
+* port_offsetting_lids_per_port
+* [in] If > 0, indicates lids_per_port to use with
+* port_offsetting option.
+*
* RETURN VALUE
* Returns the recommended port on which to route this LID.
*
diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c
index 2bac75a..37842ac 100644
--- a/opensm/opensm/osm_dump.c
+++ b/opensm/opensm/osm_dump.c
@@ -219,7 +219,8 @@ static void dump_ucast_routes(cl_map_item_t *p_map_item, FILE *file, void *cxt)
/* No LMC Optimization */
best_port = osm_switch_recommend_path(p_sw, p_port,
lid_ho, TRUE, dor,
- NULL, NULL, NULL);
+ NULL, NULL,
+ NULL, 0);
fprintf(file, "No %u hop path possible via port %u!",
best_hops, best_port);
}
diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c
index f346b25..8e3342b 100644
--- a/opensm/opensm/osm_switch.c
+++ b/opensm/opensm/osm_switch.c
@@ -55,6 +55,15 @@
#include <iba/ib_types.h>
#include <opensm/osm_switch.h>
+/* Local structs */
+struct osm_switch_remote_dest {
+ uint32_t total_paths;
+ uint32_t min_paths;
+ uint8_t ports[256]; /* 256 b/c max_ports is a uint8_t */
+ unsigned int ports_count;
+ osm_switch_guid_count_t *p_remote_guid;
+};
+
/**********************************************************************
**********************************************************************/
cl_status_t
@@ -304,9 +313,32 @@ osm_switch_find_guid_count(IN const osm_switch_t * const p_sw,
1);
}
+/**********************************************************************
+ **********************************************************************/
+
+/* greatest common divisor */
+static unsigned int
+_gcd(unsigned int a, unsigned int b)
+{
+ unsigned int t;
+ while (b != 0) {
+ t = b;
+ b = a % b;
+ a = t;
+ }
+ return a;
+}
+
+/* least common multiple */
+static unsigned int
+_lcm(unsigned int a, unsigned int b)
+{
+ return ((a*b) / _gcd(a,b));
+}
/**********************************************************************
**********************************************************************/
+
uint8_t
osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
IN osm_port_t * p_port,
@@ -315,7 +347,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
IN const boolean_t dor,
IN OUT osm_switch_guid_count_t * remote_guids,
IN OUT uint16_t * p_num_remote_guids,
- IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used)
+ IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
+ IN uint16_t port_offsetting_lids_per_port)
{
/*
We support an enhanced LMC aware routing mode:
@@ -356,6 +389,20 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
osm_node_t *p_rem_node;
osm_node_t *p_rem_node_first = NULL;
osm_switch_guid_count_t *p_remote_guid = NULL;
+ /*
+ These vars track information for port offsetting.
+ */
+ boolean_t port_offsetting = remote_guids && p_num_remote_guids
+ && p_remote_guid_count_used && port_offsetting_lids_per_port;
+ struct osm_switch_remote_dest * remote_dests = NULL;
+ struct osm_switch_remote_dest * p_remote_dest = NULL;
+ uint32_t num_remote_dests = 0;
+ uint32_t total_paths_count = 0;
+ uint32_t num_potential_ports = 0;
+ uint32_t lcm = 0;
+ uint32_t indx = 0;
+ boolean_t dest_found = FALSE;
+ unsigned int i;
CL_ASSERT(lid_ho > 0);
@@ -378,9 +425,22 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
num_ports = p_sw->num_ports;
+ if (port_offsetting) {
+ remote_dests = malloc(sizeof(struct osm_switch_remote_dest) * num_ports);
+ if (remote_dests == NULL) {
+ osm_log(p_sw->p_log, OSM_LOG_ERROR,
+ "osm_switch_recommend_path: "
+ "Cannot allocate array. Insufficient memory: "
+ "Disabling port_offsetting\n");
+ port_offsetting = 0;
+ }
+ }
+
least_hops = osm_switch_get_least_hops(p_sw, base_lid);
- if (least_hops == OSM_NO_PATH)
- return (OSM_NO_PATH);
+ if (least_hops == OSM_NO_PATH) {
+ best_port = OSM_NO_PATH;
+ goto Exit;
+ }
/*
First, inquire with the forwarding table for an existing
@@ -417,8 +477,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
in the forwarding tables that he wants to be overridden by the
minimum hop function.
*/
- if (hops == least_hops)
- return (port_num);
+ if (hops == least_hops) {
+ best_port = port_num;
+ goto Exit;
+ }
}
}
}
@@ -475,7 +537,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
port_num);
/* If not update the least hops for this case */
- if (!p_remote_guid) {
+ if (!p_remote_guid
+ || !p_remote_guid->forwarded_to) {
if (check_count < least_paths_other_sys) {
least_paths_other_sys = check_count;
best_port_other_sys = port_num;
@@ -489,7 +552,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
port_num);
/* If not update the least hops for this case */
- if (!p_remote_guid
+ if ((!p_remote_guid
+ || !p_remote_guid->forwarded_to)
&& check_count < least_paths_other_nodes) {
least_paths_other_nodes = check_count;
best_port_other_node = port_num;
@@ -498,6 +562,50 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
/* else prior sys and node guid already used */
} /* same sys found */
+
+ /* Store the new sys/node guid that we haven't seen yet */
+ if (!p_remote_guid) {
+ p_rem_physp = osm_physp_get_remote(p_physp);
+ p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
+ memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
+ &(p_rem_node->node_info.sys_guid),
+ sizeof(uint64_t));
+ memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
+ &(p_rem_node->node_info.node_guid),
+ sizeof(uint64_t));
+ remote_guids[*p_num_remote_guids].forwarded_to = 0;
+ p_remote_guid = &remote_guids[*p_num_remote_guids];
+ (*p_num_remote_guids)++;
+ }
+ }
+
+ if (port_offsetting) {
+ /* Keep track of the destinations we've seen so far */
+ p_remote_dest = NULL;
+ for (i = 0; i < num_remote_dests; i++) {
+ if (!memcmp(p_remote_guid,
+ (&remote_dests[i])->p_remote_guid,
+ sizeof(struct osm_switch_remote_dest))) {
+ p_remote_dest = &remote_dests[i];
+ break;
+ }
+ }
+ if (!p_remote_dest) {
+ p_remote_dest = &remote_dests[num_remote_dests];
+ p_remote_dest->p_remote_guid = p_remote_guid;
+ p_remote_dest->total_paths = 0;
+ p_remote_dest->min_paths = 0xFFFFFFFF;
+ p_remote_dest->ports_count = 0;
+ num_remote_dests++;
+ }
+ p_remote_dest->total_paths += check_count;
+ if (check_count < p_remote_dest->min_paths)
+ p_remote_dest->min_paths = check_count;
+ p_remote_dest->ports[p_remote_dest->ports_count] = port_num;
+ p_remote_dest->ports_count++;
+
+ total_paths_count += check_count;
+ num_potential_ports++;
}
/* routing for LMC mode */
@@ -523,21 +631,23 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
best_port = port_num;
least_paths = check_count;
if (routing_for_lmc
- && p_remote_guid
+ && p_remote_guid->forwarded_to
&& p_remote_guid->forwarded_to < least_forwarded_to)
least_forwarded_to = p_remote_guid->forwarded_to;
}
else if (routing_for_lmc
- && p_remote_guid
&& check_count == least_paths
+ && p_remote_guid->forwarded_to
&& p_remote_guid->forwarded_to < least_forwarded_to) {
least_forwarded_to = p_remote_guid->forwarded_to;
best_port = port_num;
}
}
- if (port_found == FALSE)
- return (OSM_NO_PATH);
+ if (port_found == FALSE) {
+ best_port = OSM_NO_PATH;
+ goto Exit;
+ }
/*
if we are in enhanced routing mode and the best port is not
@@ -555,24 +665,110 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
remote_guids,
p_num_remote_guids,
best_port);
+ /* Must be stored */
+ CL_ASSERT(p_remote_guid);
+ *p_remote_guid_count_used = p_remote_guid;
+ }
- if (!p_remote_guid) {
- /* track the remote node and system of the port used. */
- p_physp = osm_node_get_physp_ptr(p_sw->p_node, best_port);
- p_rem_physp = osm_physp_get_remote(p_physp);
- p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
- memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
- &(p_rem_node->node_info.sys_guid),
- sizeof(uint64_t));
- memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
- &(p_rem_node->node_info.node_guid),
- sizeof(uint64_t));
- remote_guids[*p_num_remote_guids].forwarded_to = 0;
- (*p_num_remote_guids)++;
+ /*
+ * As an example of what we're trying to do with port
+ * offsetting, assume LMC = 2 and we are trying to route
+ * the lids of 4 ports. The lids of these 4 ports are:
+ *
+ * (1,2,3,4)
+ * (5,6,7,8)
+ * (9,10,11,12)
+ * (13,14,15,16)
+ *
+ * Suppose forwarding to all these lids goes through
+ * 4 specific switch ports. If we just cycle through
+ * ports and lids in a normal iterative fashion, we would
+ * normally forward out ports in this manner.
+ *
+ * switch port 1: 1, 5, 9, 13
+ * switch port 2: 2, 6, 10, 14
+ * switch port 3: 3, 7, 11, 15
+ * switch port 4: 4, 8, 12, 1
+ *
+ * Note that the base lid of each port (lids 1, 5, 9, 13)
+ * are all routed out of switch port 1. Thus, if the user
+ * only uses the base lid of each port, they will get pretty
+ * bad performance. We will try to get this layout instead.
+ *
+ * switch port 1: 1, 8, 11, 14
+ * switch port 2: 2, 5, 12, 15
+ * switch port 3: 3, 6, 9, 16
+ * switch port 4: 4, 7, 10, 13
+ *
+ * where switch ports are distributed in a more even manner.
+ * The base lid of each port is now distributed evenly
+ * across all 4 switch ports. The remaining lids are still
+ * distributed evenly across all the remaining switch ports.
+ *
+ * In order to accomplish this, we (effectively) will iterate
+ * through all ports like before, but instead of iterating from
+ * 0 to N-1 all the time, we will select the starting index
+ * based on the number of paths we have routed thus far.
+ */
+
+ /* We will not do port offsetting if num_potential_ports == 1
+ * b/c there is no offsetting that can be done.
+ */
+ if (port_offsetting
+ && best_port
+ && num_potential_ports > 1) {
+ /* Select which destination we want to forward to with our
+ * offsetting loop.
+ */
+ lcm = _lcm(port_offsetting_lids_per_port,
+ num_remote_dests);
+ indx = (total_paths_count / lcm) % num_remote_dests;
+ for (i = 0; i < num_remote_dests; i++) {
+ p_remote_dest = &remote_dests[indx];
+ p_remote_guid = p_remote_dest->p_remote_guid;
+
+ if (p_remote_guid->forwarded_to == least_forwarded_to
+ && p_remote_dest->min_paths == least_paths) {
+ dest_found = TRUE;
+ break;
+ }
+
+ indx++;
+ if (indx >= num_remote_dests)
+ indx = 0;
+ }
+
+ /* Then we "offset" within the potential ports we could
+ * forward out of for this specific destination.
+ */
+ if (dest_found && p_remote_dest) {
+ lcm = _lcm(port_offsetting_lids_per_port,
+ p_remote_dest->ports_count);
+ indx = (p_remote_dest->total_paths / lcm) % p_remote_dest->ports_count;
+ for (i = 0; i < p_remote_dest->ports_count; i++) {
+ port_num = p_remote_dest->ports[indx];
+ check_count =
+ osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);
+ if (check_count == least_paths) {
+ if (best_port != port_num)
+ osm_log(p_sw->p_log, OSM_LOG_DEBUG,
+ "osm_switch_recommend_path: "
+ "best port offsetted: %d to %d\n",
+ best_port, port_num);
+ best_port = port_num;
+ *p_remote_guid_count_used = p_remote_dest->p_remote_guid;
+ break;
+ }
+ indx++;
+ if (indx >= p_remote_dest->ports_count)
+ indx = 0;
+ }
}
- *p_remote_guid_count_used = p_remote_guid;
}
+Exit:
+ if (remote_dests)
+ free(remote_dests);
return (best_port);
}
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index 938db84..501c2c7 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -212,18 +212,23 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
osm_switch_guid_count_t *remote_guids = NULL;
uint16_t num_used_guids = 0;
osm_switch_guid_count_t *p_remote_guid_used = NULL;
+ uint16_t port_offsetting_lids_per_port = 0;
OSM_LOG_ENTER(p_mgr->p_log);
if (lids_per_port > 1) {
- remote_guids = malloc(sizeof(osm_switch_guid_count_t) * lids_per_port);
+ uint8_t num_ports = p_sw->num_ports;
+ remote_guids = malloc(sizeof(osm_switch_guid_count_t) * num_ports);
if (remote_guids == NULL) {
osm_log(p_mgr->p_log, OSM_LOG_ERROR,
"__osm_ucast_mgr_process_port: ERR 3A09: "
"Cannot allocate array. Insufficient memory\n");
goto Exit;
}
- memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * lids_per_port);
+ memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * num_ports);
+
+ if (p_mgr->p_subn->opt.port_offsetting)
+ port_offsetting_lids_per_port = lids_per_port;
}
osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
@@ -270,14 +275,15 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
p_mgr->is_dor,
remote_guids,
&num_used_guids,
- &p_remote_guid_used);
+ &p_remote_guid_used,
+ port_offsetting_lids_per_port);
}
else
port = osm_switch_recommend_path(p_sw, p_port, lid_ho,
p_mgr->p_subn->
ignore_existing_lfts,
p_mgr->is_dor,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, 0);
/*
There might be no path to the target
--
1.5.1
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general