Hey Sasha,

I noticed that when a routing algorithm failed and defaulted back to
'minhop', the logs and the console did not report this change.  This is
because most of that code outputs the routing algorithm name that was
stored during configuration/setup.  The name isn't adjusted depending on
the routing algorithm's success/failure.

There are several ways this could be fixed.  I decided to easiest was to
stick a new routed_name field + lock into struct osm_routing_engine, and
set/use this new field respectively.

Note that within osm_ucast_mgr_process(), there is a slight logic change
from what was there before.  If the routing engine's call to
build_lid_matrices() failed, I've changed the logic to not call the
routing engine's ucast_build_fwd_tables() function.  This felt like the
correct logic and seems to be fine given all the routing algorithms in
OpenSM.  PLMK if there is some behavior subtlety I missed.

Thanks,
Al
-- 
Albert Chu
[EMAIL PROTECTED]
925-422-5311
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
>From d83332b4c6cb1cdb0fc960808f9fa53615f61201 Mon Sep 17 00:00:00 2001
From: Albert L. Chu <[EMAIL PROTECTED]>
Date: Fri, 7 Dec 2007 13:44:04 -0800
Subject: [PATCH] fix incorrect reporting of routing engine


Signed-off-by: Albert L. Chu <[EMAIL PROTECTED]>
---
 opensm/include/opensm/osm_opensm.h |    9 +++++++
 opensm/opensm/osm_console.c        |    6 +++-
 opensm/opensm/osm_opensm.c         |    6 +++++
 opensm/opensm/osm_ucast_mgr.c      |   43 ++++++++++++++++++++++++++----------
 4 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/opensm/include/opensm/osm_opensm.h b/opensm/include/opensm/osm_opensm.h
index 1b5edb8..3fc70fd 100644
--- a/opensm/include/opensm/osm_opensm.h
+++ b/opensm/include/opensm/osm_opensm.h
@@ -107,6 +107,8 @@ struct osm_routing_engine {
 	int (*ucast_build_fwd_tables) (void *context);
 	void (*ucast_dump_tables) (void *context);
 	void (*delete) (void *context);
+	const char *routed_name;
+	cl_plock_t routed_name_lock;
 };
 /*
 * FIELDS
@@ -129,6 +131,13 @@ struct osm_routing_engine {
 *	delete
 *		The delete method, may be used for routing engine
 *		internals cleanup.
+*
+*	routed_name
+*		The routing engine name used for routing (for example,
+*		the specified one failed and we used the default)
+*
+*	routed_name_lock
+*		Shared lock guarding reads and writes to routed_name.
 */
 
 typedef struct _osm_console_t {
diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
index f669240..d8d16db 100644
--- a/opensm/opensm/osm_console.c
+++ b/opensm/opensm/osm_console.c
@@ -380,9 +380,11 @@ static void print_status(osm_opensm_t * p_osm, FILE * out)
 			sm_state_mgr_str(p_osm->sm.state_mgr.state));
 		fprintf(out, "   SA State           : %s\n",
 			sa_state_str(p_osm->sa.state));
+                cl_plock_acquire(&p_osm->routing_engine.routed_name_lock);
 		fprintf(out, "   Routing Engine     : %s\n",
-			p_osm->routing_engine.name ? p_osm->routing_engine.
-			name : "null (minhop)");
+			p_osm->routing_engine.routed_name ? 
+			p_osm->routing_engine.routed_name : "null (minhop)");
+                cl_plock_release(&p_osm->routing_engine.routed_name_lock);
 #ifdef ENABLE_OSM_PERF_MGR
 		fprintf(out, "\n   PerfMgr state/sweep state : %s/%s\n",
 			osm_perfmgr_get_state_str(&(p_osm->perfmgr)),
diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 26b9969..b55a638 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -188,6 +188,8 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
 
 	cl_plock_destroy(&p_osm->lock);
 
+	cl_plock_destroy(&p_osm->routing_engine.routed_name_lock);
+
 	osm_log_destroy(&p_osm->log);
 }
 
@@ -224,6 +226,10 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
 	if (status != IB_SUCCESS)
 		goto Exit;
 
+	status = cl_plock_init(&p_osm->routing_engine.routed_name_lock);
+	if (status != IB_SUCCESS)
+		goto Exit;
+
 	if (p_opt->single_thread) {
 		osm_log(&p_osm->log, OSM_LOG_INFO,
 			"osm_opensm_init: Forcing single threaded dispatcher\n");
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index 8bb4739..fe15666 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -769,6 +769,9 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
 	struct osm_routing_engine *p_routing_eng;
 	osm_signal_t signal = OSM_SIGNAL_DONE;
 	cl_qmap_t *p_sw_guid_tbl;
+        const char *routed_name = NULL;
+        int blm = 0;
+        int ubft = 0;
 
 	OSM_LOG_ENTER(p_mgr->p_log, osm_ucast_mgr_process);
 
@@ -789,23 +792,39 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
 
 	p_mgr->any_change = FALSE;
 
-	if (!p_routing_eng->build_lid_matrices ||
-	    p_routing_eng->build_lid_matrices(p_routing_eng->context) != 0)
-		osm_ucast_mgr_build_lid_matrices(p_mgr);
-
-	osm_log(p_mgr->p_log, OSM_LOG_INFO,
-		"osm_ucast_mgr_process: "
-		"%s tables configured on all switches\n",
-		p_routing_eng->name ? p_routing_eng->name : "null (minhop)");
+	if (p_routing_eng->build_lid_matrices) {
+            blm = p_routing_eng->build_lid_matrices(p_routing_eng->context);
+            if (blm)
+                osm_ucast_mgr_build_lid_matrices(p_mgr);
+        }
+        else
+            osm_ucast_mgr_build_lid_matrices(p_mgr);
 
 	/*
 	   Now that the lid matrices have been built, we can
 	   build and download the switch forwarding tables.
 	 */
-	if (!p_routing_eng->ucast_build_fwd_tables ||
-	    p_routing_eng->ucast_build_fwd_tables(p_routing_eng->context))
-		cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_process_tbl,
-				   p_mgr);
+	if (!blm && p_routing_eng->ucast_build_fwd_tables) {
+            ubft = p_routing_eng->ucast_build_fwd_tables(p_routing_eng->context);
+            if (ubft)
+                cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_process_tbl,
+                                   p_mgr);
+        }
+        else
+            cl_qmap_apply_func(p_sw_guid_tbl, __osm_ucast_mgr_process_tbl,
+                               p_mgr);
+
+        if (!blm && !ubft)
+            routed_name = p_routing_eng->name;
+
+        CL_PLOCK_EXCL_ACQUIRE(&p_routing_eng->routed_name_lock);
+        p_routing_eng->routed_name = routed_name;
+        CL_PLOCK_RELEASE(&p_routing_eng->routed_name_lock);
+
+	osm_log(p_mgr->p_log, OSM_LOG_INFO,
+		"osm_ucast_mgr_process: "
+		"%s tables configured on all switches\n",
+		routed_name ? routed_name : "null (minhop)");
 
 	if (p_mgr->any_change) {
 		signal = OSM_SIGNAL_DONE_PENDING;
-- 
1.5.1

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to