This is the primary patch that fiddles with the path recommendation
code.  A few notes:

1) b/c I want to keep track of how many remote destinations there can
be, the 'remote_guids' array now stores all remote destinations, not
just the ones we have already forwarded to.

2) b/c I may need to free memory, I now "goto Exit" instead of just
calling 'return' many times.

3) Although the option is called 'port_offsetting', I actually "offset"
both the remote destination I send to and the port pointing towards that
remote destination.

Al

-- 
Albert Chu
[EMAIL PROTECTED]
925-422-5311
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
>From 57eb4d9bf55fbbbf39dc1c7ddfeeb2cae4776ef0 Mon Sep 17 00:00:00 2001
From: Albert L. Chu <[EMAIL PROTECTED]>
Date: Thu, 20 Mar 2008 16:23:13 -0700
Subject: [PATCH] implement port_offsetting


Signed-off-by: Albert L. Chu <[EMAIL PROTECTED]>
---
 opensm/include/opensm/osm_switch.h |    7 +-
 opensm/opensm/osm_dump.c           |    3 +-
 opensm/opensm/osm_switch.c         |  246 ++++++++++++++++++++++++++++++++----
 opensm/opensm/osm_ucast_mgr.c      |   14 ++-
 4 files changed, 239 insertions(+), 31 deletions(-)

diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h
index 2624d5f..45f4718 100644
--- a/opensm/include/opensm/osm_switch.h
+++ b/opensm/include/opensm/osm_switch.h
@@ -997,7 +997,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 			  IN const boolean_t dor,
 			  IN OUT osm_switch_guid_count_t * remote_guids,
 			  IN OUT uint16_t * p_num_remote_guids,
-			  IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used);
+			  IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
+			  IN uint16_t port_offsetting_lids_per_port);
 /*
 * PARAMETERS
 *	p_sw
@@ -1031,6 +1032,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 *		[in out] The specific osm_switch_guid_count_t used
 *		in switch recommendations.
 *
+*	port_offsetting_lids_per_port
+*		[in] If > 0, indicates lids_per_port to use with
+*		port_offsetting option.
+*
 * RETURN VALUE
 *	Returns the recommended port on which to route this LID.
 *
diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c
index 2bac75a..37842ac 100644
--- a/opensm/opensm/osm_dump.c
+++ b/opensm/opensm/osm_dump.c
@@ -219,7 +219,8 @@ static void dump_ucast_routes(cl_map_item_t *p_map_item, FILE *file, void *cxt)
 			/* No LMC Optimization */
 			best_port = osm_switch_recommend_path(p_sw, p_port,
 							      lid_ho, TRUE, dor,
-							      NULL, NULL, NULL);
+							      NULL, NULL,
+							      NULL, 0);
 			fprintf(file, "No %u hop path possible via port %u!",
 				best_hops, best_port);
 		}
diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c
index f346b25..8e3342b 100644
--- a/opensm/opensm/osm_switch.c
+++ b/opensm/opensm/osm_switch.c
@@ -55,6 +55,15 @@
 #include <iba/ib_types.h>
 #include <opensm/osm_switch.h>
 
+/* Local structs */
+struct osm_switch_remote_dest {
+	uint32_t total_paths;	
+	uint32_t min_paths;
+	uint8_t ports[256]; /* 256 b/c max_ports is a uint8_t */
+	unsigned int ports_count;
+	osm_switch_guid_count_t *p_remote_guid;
+};
+
 /**********************************************************************
  **********************************************************************/
 cl_status_t
@@ -304,9 +313,32 @@ osm_switch_find_guid_count(IN const osm_switch_t * const p_sw,
 					   1);
 }
 
+/**********************************************************************
+ **********************************************************************/
+
+/* greatest common divisor */
+static unsigned int
+_gcd(unsigned int a, unsigned int b)
+{
+	unsigned int t;
+	while (b != 0) {
+		t = b;
+		b = a % b;
+		a = t;
+	}
+	return a;
+}
+
+/* least common multiple */
+static unsigned int
+_lcm(unsigned int a, unsigned int b)
+{
+	return ((a*b) / _gcd(a,b));
+}
 
 /**********************************************************************
  **********************************************************************/
+
 uint8_t
 osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 			  IN osm_port_t * p_port,
@@ -315,7 +347,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 			  IN const boolean_t dor,
 			  IN OUT osm_switch_guid_count_t * remote_guids,
 			  IN OUT uint16_t * p_num_remote_guids,
-			  IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used)
+			  IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
+			  IN uint16_t port_offsetting_lids_per_port)
 {
 	/*
 	   We support an enhanced LMC aware routing mode:
@@ -356,6 +389,20 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 	osm_node_t *p_rem_node;
 	osm_node_t *p_rem_node_first = NULL;
 	osm_switch_guid_count_t *p_remote_guid = NULL;
+	/*
+	   These vars track information for port offsetting.
+	 */
+	boolean_t port_offsetting = remote_guids && p_num_remote_guids
+		&& p_remote_guid_count_used && port_offsetting_lids_per_port;
+	struct osm_switch_remote_dest * remote_dests = NULL;
+	struct osm_switch_remote_dest * p_remote_dest = NULL;
+	uint32_t num_remote_dests = 0;
+	uint32_t total_paths_count = 0;
+	uint32_t num_potential_ports = 0;
+	uint32_t lcm = 0;
+	uint32_t indx = 0;
+	boolean_t dest_found = FALSE;
+	unsigned int i;
 
 	CL_ASSERT(lid_ho > 0);
 
@@ -378,9 +425,22 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 
 	num_ports = p_sw->num_ports;
 
+	if (port_offsetting) {
+		remote_dests = malloc(sizeof(struct osm_switch_remote_dest) * num_ports);
+		if (remote_dests == NULL) {
+			osm_log(p_sw->p_log, OSM_LOG_ERROR,
+				"osm_switch_recommend_path: "
+				"Cannot allocate array. Insufficient memory: "
+				"Disabling port_offsetting\n");
+			port_offsetting = 0;
+		}
+	}
+
 	least_hops = osm_switch_get_least_hops(p_sw, base_lid);
-	if (least_hops == OSM_NO_PATH)
-		return (OSM_NO_PATH);
+	if (least_hops == OSM_NO_PATH) {
+		best_port = OSM_NO_PATH;
+		goto Exit;
+	}
 
 	/*
 	   First, inquire with the forwarding table for an existing
@@ -417,8 +477,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 				   in the forwarding tables that he wants to be overridden by the
 				   minimum hop function.
 				 */
-				if (hops == least_hops)
-					return (port_num);
+				if (hops == least_hops) {
+					best_port = port_num;
+					goto Exit;
+				}
 			}
 		}
 	}
@@ -475,7 +537,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 								       port_num);
 
 			/* If not update the least hops for this case */
-			if (!p_remote_guid) {
+			if (!p_remote_guid
+			    || !p_remote_guid->forwarded_to) {
 				if (check_count < least_paths_other_sys) {
 					least_paths_other_sys = check_count;
 					best_port_other_sys = port_num;
@@ -489,7 +552,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 										port_num);
 
 				/* If not update the least hops for this case */
-				if (!p_remote_guid
+				if ((!p_remote_guid
+				     || !p_remote_guid->forwarded_to)
 				    && check_count < least_paths_other_nodes) {
 					least_paths_other_nodes = check_count;
 					best_port_other_node = port_num;
@@ -498,6 +562,50 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 				/* else prior sys and node guid already used */
 
 			}	/* same sys found */
+
+			/* Store the new sys/node guid that we haven't seen yet */
+			if (!p_remote_guid) {
+				p_rem_physp = osm_physp_get_remote(p_physp);
+				p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
+				memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
+				       &(p_rem_node->node_info.sys_guid),
+				       sizeof(uint64_t));
+				memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
+				       &(p_rem_node->node_info.node_guid),
+				       sizeof(uint64_t));
+				remote_guids[*p_num_remote_guids].forwarded_to = 0;
+				p_remote_guid = &remote_guids[*p_num_remote_guids];
+				(*p_num_remote_guids)++;
+			}
+		}
+
+		if (port_offsetting) {
+			/* Keep track of the destinations we've seen so far */
+			p_remote_dest = NULL;
+			for (i = 0; i < num_remote_dests; i++) {
+				if (!memcmp(p_remote_guid,
+					    (&remote_dests[i])->p_remote_guid,
+					    sizeof(struct osm_switch_remote_dest))) {
+					p_remote_dest = &remote_dests[i];
+					break;
+				}
+			}
+			if (!p_remote_dest) {
+				p_remote_dest = &remote_dests[num_remote_dests];
+				p_remote_dest->p_remote_guid = p_remote_guid;
+				p_remote_dest->total_paths = 0;
+				p_remote_dest->min_paths = 0xFFFFFFFF;
+				p_remote_dest->ports_count = 0;
+				num_remote_dests++;
+			}
+			p_remote_dest->total_paths += check_count;
+			if (check_count < p_remote_dest->min_paths)
+				p_remote_dest->min_paths = check_count;
+			p_remote_dest->ports[p_remote_dest->ports_count] = port_num;
+			p_remote_dest->ports_count++;
+
+			total_paths_count += check_count;
+			num_potential_ports++;
 		}
 
 		/* routing for LMC mode */
@@ -523,21 +631,23 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 			best_port = port_num;
 			least_paths = check_count;
 			if (routing_for_lmc
-			    && p_remote_guid
+			    && p_remote_guid->forwarded_to
 			    && p_remote_guid->forwarded_to < least_forwarded_to)
 				least_forwarded_to = p_remote_guid->forwarded_to;
 		}
 		else if (routing_for_lmc
-			 && p_remote_guid
 			 && check_count == least_paths
+			 && p_remote_guid->forwarded_to
 			 && p_remote_guid->forwarded_to < least_forwarded_to) {
 			least_forwarded_to = p_remote_guid->forwarded_to;
 			best_port = port_num;
 		}
 	}
 
-	if (port_found == FALSE)
-		return (OSM_NO_PATH);
+	if (port_found == FALSE) {
+		best_port = OSM_NO_PATH;
+		goto Exit;
+	}
 
 	/*
 	   if we are in enhanced routing mode and the best port is not
@@ -555,24 +665,110 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
 							   remote_guids,
 							   p_num_remote_guids,
 							   best_port);
+		/* Must be stored */
+		CL_ASSERT(p_remote_guid);
+		*p_remote_guid_count_used = p_remote_guid;
+	}
 
-		if (!p_remote_guid) {
-			/* track the remote node and system of the port used. */
-			p_physp = osm_node_get_physp_ptr(p_sw->p_node, best_port);
-			p_rem_physp = osm_physp_get_remote(p_physp);
-			p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
-			memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
-			       &(p_rem_node->node_info.sys_guid),
-			       sizeof(uint64_t));
-			memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
-				       &(p_rem_node->node_info.node_guid),
-			       sizeof(uint64_t));
-			remote_guids[*p_num_remote_guids].forwarded_to = 0;
-			(*p_num_remote_guids)++;
+	/*
+	 * As an example of what we're trying to do with port
+	 * offsetting, assume LMC = 2 and we are trying to route
+	 * the lids of 4 ports. The lids of these 4 ports are:
+	 *
+	 * (1,2,3,4)
+	 * (5,6,7,8)
+	 * (9,10,11,12)
+	 * (13,14,15,16)
+	 *
+	 * Suppose forwarding to all these lids goes through
+	 * 4 specific switch ports.  If we just cycle through
+	 * ports and lids in a normal iterative fashion, we would
+	 * normally forward out ports in this manner.
+	 *
+	 * switch port 1: 1, 5, 9, 13
+	 * switch port 2: 2, 6, 10, 14
+	 * switch port 3: 3, 7, 11, 15
+	 * switch port 4: 4, 8, 12, 1
+	 *
+	 * Note that the base lid of each port (lids 1, 5, 9, 13)
+	 * are all routed out of switch port 1.  Thus, if the user
+	 * only uses the base lid of each port, they will get pretty
+	 * bad performance.  We will try to get this layout instead.
+	 *
+	 * switch port 1: 1, 8, 11, 14
+	 * switch port 2: 2, 5, 12, 15
+	 * switch port 3: 3, 6, 9,  16
+	 * switch port 4: 4, 7, 10, 13
+	 *
+	 * where switch ports are distributed in a more even manner.
+	 * The base lid of each port is now distributed evenly
+	 * across all 4 switch ports.  The remaining lids are still
+	 * distributed evenly across all the remaining switch ports.
+	 *
+	 * In order to accomplish this, we (effectively) will iterate
+	 * through all ports like before, but instead of iterating from
+	 * 0 to N-1 all the time, we will select the starting index
+	 * based on the number of paths we have routed thus far.
+	 */
+
+	/* We will not do port offsetting if num_potential_ports == 1
+	 * b/c there is no offsetting that can be done.
+	 */
+	if (port_offsetting
+	    && best_port
+	    && num_potential_ports > 1) {
+		/* Select which destination we want to forward to with our
+		 * offsetting loop.
+		 */
+		lcm = _lcm(port_offsetting_lids_per_port,
+			   num_remote_dests);
+		indx = (total_paths_count / lcm) % num_remote_dests;
+		for (i = 0; i < num_remote_dests; i++) {
+			p_remote_dest = &remote_dests[indx];
+			p_remote_guid = p_remote_dest->p_remote_guid;
+
+			if (p_remote_guid->forwarded_to == least_forwarded_to
+			    && p_remote_dest->min_paths == least_paths) {
+				dest_found = TRUE;
+				break;
+			}
+
+			indx++;
+			if (indx >= num_remote_dests)
+				indx = 0;
+		}
+
+		/* Then we "offset" within the potential ports we could
+		 * forward out of for this specific destination.
+		 */
+		if (dest_found && p_remote_dest) {
+			lcm = _lcm(port_offsetting_lids_per_port,
+				   p_remote_dest->ports_count);
+			indx = (p_remote_dest->total_paths / lcm) % p_remote_dest->ports_count;
+			for (i = 0; i < p_remote_dest->ports_count; i++) {
+				port_num = p_remote_dest->ports[indx];
+				check_count =
+					osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);
+				if (check_count == least_paths) {
+					if (best_port != port_num)
+						osm_log(p_sw->p_log, OSM_LOG_DEBUG,
+							"osm_switch_recommend_path: "
+							"best port offsetted: %d to %d\n",
+							best_port, port_num);
+					best_port = port_num;
+					*p_remote_guid_count_used = p_remote_dest->p_remote_guid;
+					break;
+				}
+				indx++;
+				if (indx >= p_remote_dest->ports_count)
+					indx = 0;
+			}
 		}
-		*p_remote_guid_count_used = p_remote_guid;
 	}
 
+Exit:
+	if (remote_dests)
+		free(remote_dests);
 	return (best_port);
 }
 
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index 938db84..501c2c7 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -212,18 +212,23 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
 	osm_switch_guid_count_t *remote_guids = NULL;
 	uint16_t num_used_guids = 0;
 	osm_switch_guid_count_t *p_remote_guid_used = NULL;
+	uint16_t port_offsetting_lids_per_port = 0;
 
 	OSM_LOG_ENTER(p_mgr->p_log);
 
 	if (lids_per_port > 1) {
-		remote_guids = malloc(sizeof(osm_switch_guid_count_t) * lids_per_port);
+		uint8_t num_ports = p_sw->num_ports;
+		remote_guids = malloc(sizeof(osm_switch_guid_count_t) * num_ports);
 		if (remote_guids == NULL) {
 			osm_log(p_mgr->p_log, OSM_LOG_ERROR,
 				"__osm_ucast_mgr_process_port: ERR 3A09: "
 				"Cannot allocate array. Insufficient memory\n");
 			goto Exit;
 		}
-		memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * lids_per_port);
+		memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * num_ports);
+
+		if (p_mgr->p_subn->opt.port_offsetting)
+			port_offsetting_lids_per_port = lids_per_port;
 	}
 
 	osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
@@ -270,14 +275,15 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
 							 p_mgr->is_dor,
 							 remote_guids,
 							 &num_used_guids,
-							 &p_remote_guid_used);
+							 &p_remote_guid_used,
+							 port_offsetting_lids_per_port);
 		}
 		else
 			port = osm_switch_recommend_path(p_sw, p_port, lid_ho,
 							 p_mgr->p_subn->
 							 ignore_existing_lfts,
 							 p_mgr->is_dor,
-							 NULL, NULL, NULL);
+							 NULL, NULL, NULL, 0);
 
 		/*
 		   There might be no path to the target
-- 
1.5.1

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to