[ofa-general] [PATCH] opensm/osm_pkey.c: cosmetics in some log message

2008-11-10 Thread Yevgeny Kliteynik
Hi Sasha,

Just some cosmetics in a log message.

Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
---
 opensm/opensm/osm_pkey.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/opensm/opensm/osm_pkey.c b/opensm/opensm/osm_pkey.c
index 3adc8d7..e09faa8 100644
--- a/opensm/opensm/osm_pkey.c
+++ b/opensm/opensm/osm_pkey.c
@@ -475,7 +475,7 @@ osm_physp_has_pkey(IN osm_log_t * p_log,
OSM_LOG_ENTER(p_log);

OSM_LOG(p_log, OSM_LOG_DEBUG,
-   Search for PKey: 0x%4x\n, cl_ntoh16(pkey));
+   Search for PKey: 0x%04x\n, cl_ntoh16(pkey));

/* if the pkey given is an invalid pkey - return TRUE. */
if (ib_pkey_is_invalid(pkey)) {
-- 
1.5.1.4

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] [PATCH] opensm/ib_types.h: rename IB_MC_REC_STATE_SEND_ONLY_MEMBER

2008-11-10 Thread Yevgeny Kliteynik
Sasha,

The multicast Send Only bit is defined in spec as SendOnlyNonMemeber,
to denote that the port is not considered a member for purposes of group
creation/deletion.

Renaming IB_MC_REC_STATE_SEND_ONLY_MEMBER to 
IB_MC_REC_STATE_SEND_ONLY_NON_MEMBER.

Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
---
 opensm/include/iba/ib_types.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/opensm/include/iba/ib_types.h b/opensm/include/iba/ib_types.h
index 6412ea9..0f9d110 100644
--- a/opensm/include/iba/ib_types.h
+++ b/opensm/include/iba/ib_types.h
@@ -7085,7 +7085,7 @@ ib_member_set_join_state(IN OUT ib_member_rec_t * 
p_mc_rec,
  */
 #define IB_MC_REC_STATE_FULL_MEMBER 0x01
 #define IB_MC_REC_STATE_NON_MEMBER 0x02
-#define IB_MC_REC_STATE_SEND_ONLY_MEMBER 0x04
+#define IB_MC_REC_STATE_SEND_ONLY_NON_MEMBER 0x04

 /*
  * Generic MAD notice types
-- 
1.5.1.4

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] [PATCH] opensm/osm_multicast.c: bug with joining/leaving mcast group

2008-11-10 Thread Yevgeny Kliteynik
Hi Sasha,

I think there's a bug in the osm_mgrp_add/remove_port functions.
If some mcast group member has JoinState 0x1 (full member),
and then new join from the same port received with JoinState
0x2 (non member), OpenSM will reduce number of full members
of this group, which eventually might cause group deletion.
Similar problem (only in logically opposite direction) happens
when port tries to partially leave mcast group.

This patch should fix it.

Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
---
 opensm/opensm/osm_multicast.c |   33 +++--
 1 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
index d62d585..350fd22 100644
--- a/opensm/opensm/osm_multicast.c
+++ b/opensm/opensm/osm_multicast.c
@@ -172,17 +172,11 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t *subn, 
osm_log_t *log,
p_mgrp-last_change_id++;
}

-   if ((join_state ^ prev_join_state)  IB_JOIN_STATE_FULL) {
-   if (join_state  IB_JOIN_STATE_FULL) {
-   if (++p_mgrp-full_members == 1) {
-   mgrp_send_notice(subn, log, p_mgrp, 66);
-   p_mgrp-to_be_deleted = 0;
-   }
-   } else if (--p_mgrp-full_members == 0) {
-   mgrp_send_notice(subn, log, p_mgrp, 67);
-   if (!p_mgrp-well_known)
-   p_mgrp-to_be_deleted = 1;
-   }
+   if ((join_state  IB_JOIN_STATE_FULL) 
+   !(prev_join_state  IB_JOIN_STATE_FULL) 
+   (++p_mgrp-full_members == 1)) {
+   mgrp_send_notice(subn, log, p_mgrp, 66);
+   p_mgrp-to_be_deleted = 0;
}

return (p_mcm_port);
@@ -224,17 +218,12 @@ int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t 
*log, osm_mgrp_t *mgrp,

/* no more full members so the group will be deleted after re-route
   but only if it is not a well known group */
-   if ((port_join_state ^ new_join_state)  IB_JOIN_STATE_FULL) {
-   if (port_join_state  IB_JOIN_STATE_FULL) {
-   if (--mgrp-full_members == 0) {
-   mgrp_send_notice(subn, log, mgrp, 67);
-   if (!mgrp-well_known)
-   mgrp-to_be_deleted = 1;
-   }
-   } else if (++mgrp-full_members == 1) {
-   mgrp_send_notice(subn, log, mgrp, 66);
-   mgrp-to_be_deleted = 0;
-   }
+   if ((port_join_state  IB_JOIN_STATE_FULL) 
+   !(new_join_state  IB_JOIN_STATE_FULL) 
+   (--mgrp-full_members == 0)) {
+   mgrp_send_notice(subn, log, mgrp, 67);
+   if (!mgrp-well_known)
+   mgrp-to_be_deleted = 1;
}

return ret;
-- 
1.5.1.4

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Agenda for OFED meeting today - Nov 10

2008-11-10 Thread Tziporet Koren
This is the agenda for OFED meeting today on OFED release status:

1. Decide on RC4 release - I suggest to do it tomorrow
2. Decide on GA release:
my suggestion - RC5 in a week (Monday 17, Nov)
GA - Nov 24 (we cannot delay more in that week since it will be on
Thanks Giving holiday)
We can try on Friday Nov 21
3. Release notes - all owners must update the release notes
4. Bugs review:

1323blo [EMAIL PROTECTED]   REOP
IB/ehca: possibillity of kernel panic under certain circu...
1370blo [EMAIL PROTECTED]   NEW Ping
over IPoIB I/F fails after ifconfig down and up
1364cri [EMAIL PROTECTED]   NEW system
hang on rmmod cxgb3 in rhel4.7
1365cri [EMAIL PROTECTED]   NEW Panic on
loading iw_cxgb3 in RHEL 4.6
1366cri [EMAIL PROTECTED]   NEW Panic
during boot-up after an OFED install in RHEL 4.5
1242cri [EMAIL PROTECTED]   NEW kernel
panic while running mpi2007 against ofed1.4 -- ib_...
1289maj [EMAIL PROTECTED]   NEW Ib and
ipoib doesnt respond while running multiple tests ...
1349maj [EMAIL PROTECTED]   NEW Kernel
panic on sdp
1336maj [EMAIL PROTECTED]   NEW Can't to
unloading the mlx4_ib module on ppc64
1358maj [EMAIL PROTECTED]   ASSIfmr_test
causes eth0 transmit timeout - should be fixed
1359maj [EMAIL PROTECTED]   NEW Kernel
panic while running Ltp - ongoing

Tziporet  Vlad
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] ofa_1_4_kernel 20081110-0200 daily build status

2008-11-10 Thread Vladimir Sokolovsky (Mellanox)
This email was generated automatically, please do not reply


git_url: git://git.openfabrics.org/ofed_1_4/linux-2.6.git
git_branch: ofed_kernel

Common build parameters: 

Passed:
Passed on i686 with linux-2.6.16
Passed on i686 with linux-2.6.18
Passed on i686 with linux-2.6.17
Passed on i686 with linux-2.6.19
Passed on i686 with linux-2.6.21.1
Passed on i686 with linux-2.6.22
Passed on i686 with linux-2.6.24
Passed on i686 with linux-2.6.26
Passed on i686 with linux-2.6.27
Passed on x86_64 with linux-2.6.16
Passed on x86_64 with linux-2.6.16.43-0.3-smp
Passed on x86_64 with linux-2.6.16.21-0.8-smp
Passed on x86_64 with linux-2.6.18
Passed on x86_64 with linux-2.6.17
Passed on x86_64 with linux-2.6.16.60-0.21-smp
Passed on x86_64 with linux-2.6.18-8.el5
Passed on x86_64 with linux-2.6.18-1.2798.fc6
Passed on x86_64 with linux-2.6.18-53.el5
Passed on x86_64 with linux-2.6.19
Passed on x86_64 with linux-2.6.20
Passed on x86_64 with linux-2.6.18-93.el5
Passed on x86_64 with linux-2.6.22
Passed on x86_64 with linux-2.6.21.1
Passed on x86_64 with linux-2.6.22.5-31-default
Passed on x86_64 with linux-2.6.25
Passed on x86_64 with linux-2.6.24
Passed on x86_64 with linux-2.6.26
Passed on x86_64 with linux-2.6.9-42.ELsmp
Passed on x86_64 with linux-2.6.9-55.ELsmp
Passed on x86_64 with linux-2.6.27
Passed on x86_64 with linux-2.6.9-67.ELsmp
Passed on x86_64 with linux-2.6.9-78.ELsmp
Passed on ia64 with linux-2.6.17
Passed on ia64 with linux-2.6.16
Passed on ia64 with linux-2.6.16.21-0.8-default
Passed on ia64 with linux-2.6.21.1
Passed on ia64 with linux-2.6.19
Passed on ia64 with linux-2.6.18
Passed on ia64 with linux-2.6.23
Passed on ia64 with linux-2.6.22
Passed on ia64 with linux-2.6.24
Passed on ia64 with linux-2.6.25
Passed on ia64 with linux-2.6.26
Passed on ppc64 with linux-2.6.16
Passed on ppc64 with linux-2.6.17
Passed on ppc64 with linux-2.6.19
Passed on ppc64 with linux-2.6.18
Passed on ppc64 with linux-2.6.18-8.el5

Failed:
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


RE: [ofa-general] NFS-RDMA (OFED1.4) with standard distributions ?

2008-11-10 Thread Ciesielski, Frederic (EMEA HPCOSLO CC)
That's great, thanks.

I ran some tests with the 2.6.27 kernel as server and client, and basically it 
works fine.

I could not find yet any situation where NFS-RDMA would outperform NFS/IPoIB, 
at least when you compare apples to apples (same clients, same server, same 
protocol, and not just write to/read from the caches), and it even seems to 
have severe performance issues for reading with files larger than the memory 
size of the client and the server.
Hopefully this will improve when more users will be able to give valuable 
feedback...

Fred.

-Original Message-
From: Jeff Becker [mailto:[EMAIL PROTECTED]
Sent: Saturday, 08 November, 2008 22:35
To: Ciesielski, Frederic (EMEA HPCOSLO CC)
Cc: general@lists.openfabrics.org
Subject: Re: [ofa-general] NFS-RDMA (OFED1.4) with standard distributions ?

Ciesielski, Frederic (EMEA HPCOSLO CC) wrote:
 Is there any chance that the new NFS-RDMA features coming with OFED
 1.4 work with standard and current distributions, like RHEL5, SLES10 ?
Not yet, but I'm working on it. I intend for NFSRDMA to work on 2.6.27 and 
2.6.26 for OFED 1.4. The RHEL5 and SLES10 backports will likely be done for 
OFED 1.4.1. Thanks.

-jeff

 Did anybody test this, or would pretend it is supposed to work ?

 I mean without building a 2.6.27 or equivalent kernel on top of it,
 keeping almost full support from the vendors.

 Enhanced kernel modules may not be sufficient to work around the
 limitations of old kernels...



 --
 --

 ___
 general mailing list
 general@lists.openfabrics.org
 http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

 To unsubscribe, please visit
 http://openib.org/mailman/listinfo/openib-general

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [Fwd: RE: [ofa-general] NFS-RDMA (OFED1.4) with standard distributions ?]

2008-11-10 Thread Tom Tucker

Jeff:

Unfortunately, the NFSRDMA transport cannot make your disks go faster. 
If the storage subsystem is incapable of keeping up with IPoIB, then it 
won't be able to keep up with NFSRDMA either.


To compare NFSRDMA and IPoIB performance absent a very fast storage 
subsystem you'll need to keep the file sizes small enough such that they 
fit within the server cache.


Tom


Jeff Becker wrote:

Hi. Just passing this on in case you missed it. Do you have any advice
on what knobs to tweak to get better performance (than NFS/IPoIB)? Thanks.

-jeff

 Original Message 
Subject:RE: [ofa-general] NFS-RDMA (OFED1.4) with standard
distributions ?
Date:   Mon, 10 Nov 2008 16:27:50 +
From:   Ciesielski, Frederic (EMEA HPCOSLO CC) [EMAIL PROTECTED]
To: Jeff Becker [EMAIL PROTECTED]
CC: general@lists.openfabrics.org general@lists.openfabrics.org
References:
[EMAIL PROTECTED]
[EMAIL PROTECTED]



That's great, thanks.

I ran some tests with the 2.6.27 kernel as server and client, and basically it 
works fine.

I could not find yet any situation where NFS-RDMA would outperform NFS/IPoIB, 
at least when you compare apples to apples (same clients, same server, same 
protocol, and not just write to/read from the caches), and it even seems to 
have severe performance issues for reading with files larger than the memory 
size of the client and the server.
Hopefully this will improve when more users will be able to give valuable 
feedback...

Fred.

-Original Message-
From: Jeff Becker [mailto:[EMAIL PROTECTED]
Sent: Saturday, 08 November, 2008 22:35
To: Ciesielski, Frederic (EMEA HPCOSLO CC)
Cc: general@lists.openfabrics.org
Subject: Re: [ofa-general] NFS-RDMA (OFED1.4) with standard distributions ?

Ciesielski, Frederic (EMEA HPCOSLO CC) wrote:

Is there any chance that the new NFS-RDMA features coming with OFED
1.4 work with standard and current distributions, like RHEL5, SLES10 ?

Not yet, but I'm working on it. I intend for NFSRDMA to work on 2.6.27 and 
2.6.26 for OFED 1.4. The RHEL5 and SLES10 backports will likely be done for 
OFED 1.4.1. Thanks.

-jeff


Did anybody test this, or would pretend it is supposed to work ?

I mean without building a 2.6.27 or equivalent kernel on top of it,
keeping almost full support from the vendors.

Enhanced kernel modules may not be sufficient to work around the
limitations of old kernels...



--
--

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit
http://openib.org/mailman/listinfo/openib-general




___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [ewg] Agenda for OFED meeting today - Nov 10

2008-11-10 Thread Steve Wise



1364cri [EMAIL PROTECTED]   NEW system
hang on rmmod cxgb3 in rhel4.7
1365cri [EMAIL PROTECTED]   NEW Panic on
loading iw_cxgb3 in RHEL 4.6
1366cri [EMAIL PROTECTED]   NEW Panic
during boot-up after an OFED install in RHEL 4.5
  


Sorry I missed the call (yet again). 


1364 is under investigation, should have a fix today.
1365 closed.  Didn't see the problem in latest daily build
1366 will need a fix and hopefully I'll have something today/tomorrow.  
This isn't related to just RH4.5, but rather to new chelsio boards that 
aren't supported in ofed-1.4.


These can all wait for -rc5 if you don't want to hold up rc4.

Thanx,

Steve.

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [opensm patch] support dump_conf command in opensm console

2008-11-10 Thread Al Chu
Hey Sasha,

On Sun, 2008-11-09 at 19:25 +0200, Sasha Khapyorsky wrote:
 Hi Al,
 
 On 16:39 Mon 03 Nov , Al Chu wrote:
  Hey Sasha,
  
  When config files are rescanned and loaded, there's no way to know if
  the right configuration was actually reloaded or not.  A console command
  to dump the current config is a useful way to verify the loading of new
  configs or not.
  
  This patch assumes the fixes from my fix qos config parsing bugs is
  accepted.
 
 Didn't pass over it, sorry about delay.
 
  
  Al
  
  -- 
  Albert Chu
  [EMAIL PROTECTED]
  Computer Scientist
  High Performance Systems Division
  Lawrence Livermore National Laboratory
 
  From 249607e47ec7ef1b92f9578cece90460418d12b8 Mon Sep 17 00:00:00 2001
  From: Albert Chu [EMAIL PROTECTED]
  Date: Mon, 3 Nov 2008 16:22:29 -0800
  Subject: [PATCH] support dump_conf console command
  
  
  Signed-off-by: Albert Chu [EMAIL PROTECTED]
  ---
   opensm/opensm/osm_console.c |  158 
  +++
   1 files changed, 158 insertions(+), 0 deletions(-)
  
  diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
  index d9bbbc2..8422655 100644
  --- a/opensm/opensm/osm_console.c
  +++ b/opensm/opensm/osm_console.c
  @@ -53,6 +53,10 @@
   #include complib/cl_passivelock.h
   #include opensm/osm_perfmgr.h
   
  +#define NULL_STR (null)
  +
  +#define BOOLEAN_STR(__b) ((__b) ? TRUE : FALSE)
  +
   struct command {
  char *name;
  void (*help_function) (FILE * out, int detail);
  @@ -189,6 +193,14 @@ static void help_lidbalance(FILE * out, int detail)
  }
   }
   
  +static void help_dump_conf(FILE *out, int detail)
  +{
  +   fprintf(out, dump_conf\n);
  +   if (detail) {
  +   fprintf(out, dump current opensm configuration\n);
  +   }
  +}
  +
   #ifdef ENABLE_OSM_PERF_MGR
   static void help_perfmgr(FILE * out, int detail)
   {
  @@ -1136,6 +1148,151 @@ static void perfmgr_parse(char **p_last, 
  osm_opensm_t * p_osm, FILE * out)
   }
   #endif /* ENABLE_OSM_PERF_MGR */
   
  +static void dump_qos_options(osm_qos_options_t * opt,
  +osm_qos_options_t * dflt, 
  +char *prefix,
  +FILE * out)
  +{
  +   fprintf(out, %s_max_vls : %u\n,
  +   prefix, opt-max_vls ? opt-max_vls : dflt-max_vls);
  +   fprintf(out, %s_high_limit : %u\n,
  +   prefix, opt-high_limit = 0 ? (unsigned)opt-high_limit : 
  (unsigned)dflt-high_limit);
  +   fprintf(out, %s_vlarb_high : %s\n,
  +   prefix, opt-vlarb_high ? opt-vlarb_high : dflt-vlarb_high);
  +   fprintf(out, %s_vlarb_low : %s\n,
  +   prefix, opt-vlarb_low ? opt-vlarb_low : dflt-vlarb_low);
  +   fprintf(out, %s_sl2vl : %s\n,
  +   prefix, opt-sl2vl ? opt-sl2vl : dflt-sl2vl);
  +}
  +
  +static void dump_conf_parse(char **p_last, osm_opensm_t * p_osm, FILE * 
  out)
  +{
 
 Why to not use osm_subn_write_conf_file() function (wrapped by
 dump_conf_parse())? I think we need to have config dumping code
 consolidated.

I had thought of that, but I didn't want all of the instructions and all
the extra lines of output.  But I guess it's not that big of a deal in
the end.  I'll send a new patch.

Al

 Sasha
 
  +   osm_subn_opt_t * opt = p_osm-subn.opt;
  +
  +   fprintf(out, config_file : %s\n, 
  +   opt-config_file ? opt-config_file : NULL_STR);
  +   fprintf(out, guid : 0x%016 PRIx64 \n, opt-guid);
  +   fprintf(out, m_key : 0x%016 PRIx64 \n, opt-m_key);
  +   fprintf(out, sm_key : 0x%016 PRIx64 \n, opt-sm_key);
  +   fprintf(out, sa_key : 0x%016 PRIx64 \n, opt-sa_key);
  +   fprintf(out, subnet_prefix : 0x%016 PRIx64 \n, opt-subnet_prefix);
  +   fprintf(out, m_key_lease_period : %u\n, opt-m_key_lease_period);
  +   fprintf(out, sweep_interval : %u\n, opt-sweep_interval);
  +   fprintf(out, max_wire_smps : %u\n, opt-max_wire_smps);
  +   fprintf(out, transaction_timeout : %u\n, opt-transaction_timeout);
  +   fprintf(out, sm_priority : %u\n, opt-sm_priority);
  +   fprintf(out, lmc : %u\n, opt-lmc);
  +   fprintf(out, lmc_esp0 : %s\n, 
  +   BOOLEAN_STR(opt-lmc_esp0));
  +   fprintf(out, max_op_vls : %u\n, opt-max_op_vls);
  +   fprintf(out, force_link_speed : %u\n, opt-force_link_speed);
  +   fprintf(out, reassign_lids : %s\n, 
  +   BOOLEAN_STR(opt-reassign_lids));
  +   fprintf(out, ignore_other_sm : %s\n, 
  +   BOOLEAN_STR(opt-ignore_other_sm));
  +   fprintf(out, single_thread : %s\n, 
  +   BOOLEAN_STR(opt-single_thread));
  +   fprintf(out, disable_multicast : %s\n, 
  +   BOOLEAN_STR(opt-disable_multicast));
  +   fprintf(out, force_log_flush : %s\n, 
  +   BOOLEAN_STR(opt-force_log_flush));
  +   fprintf(out, subnet_timeout : %u\n, opt-subnet_timeout);
  +   fprintf(out, packet_life_time : %u\n, opt-packet_life_time);
  +   fprintf(out, vl_stall_count : %u\n, opt-vl_stall_count);
  +   fprintf(out, leaf_vl_stall_count : %u\n, 

[ofa-general] [PATCH] opensm: osm_opensm.c added a method to remove plugins

2008-11-10 Thread Timothy A. Meier
Sasha,

During development, I am constantly bringing the SM up and down, so this helps 
make sure things
shut down gracefully.

Should have no impact, if people are not using plugins... yet.

From e0434e676d0b3dd63a323218d207f029da9e27a4 Mon Sep 17 00:00:00 2001
From: Tim Meier [EMAIL PROTECTED]
Date: Mon, 10 Nov 2008 09:48:55 -0800
Subject: [PATCH] opensm:  osm_opensm.c added a method to remove plugins

Upon shutdown, iterates through the plugins and releases
resources and removes them via their destroy() method.

Signed-off-by: Tim Meier [EMAIL PROTECTED]
---
 opensm/opensm/osm_opensm.c |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 7deea6d..7286782 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -238,6 +238,19 @@ static void destroy_routing_engines(osm_opensm_t *osm)
}
 }

+/**
+ **/
+static void destroy_plugins(osm_opensm_t *osm)
+{
+   osm_epi_plugin_t *p;
+   // remove from the list, and destroy it
+   while (!cl_is_qlist_empty(osm-plugin_list)){
+   p = (osm_epi_plugin_t *)cl_qlist_remove_head(osm-plugin_list);
+   // plugin is responsible for freeing its own resources
+   osm_epi_destroy(p);
+   }
+}
+
 void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
 {
/* in case of shutdown through exit proc - no ^C */
@@ -275,6 +288,7 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
osm_sa_db_file_dump(p_osm);

/* do the destruction in reverse order as init */
+   destroy_plugins(p_osm);
destroy_routing_engines(p_osm);
osm_sa_destroy(p_osm-sa);
osm_sm_destroy(p_osm-sm);
--
1.5.4.5

-- 
Timothy A. Meier
Computer Scientist
ICCD/High Performance Computing
925.422.3341
[EMAIL PROTECTED]
From e0434e676d0b3dd63a323218d207f029da9e27a4 Mon Sep 17 00:00:00 2001
From: Tim Meier [EMAIL PROTECTED]
Date: Mon, 10 Nov 2008 09:48:55 -0800
Subject: [PATCH] opensm:  osm_opensm.c added a method to remove plugins

Upon shutdown, iterates through the plugins and releases
resources and removes them via their destroy() method.

Signed-off-by: Tim Meier [EMAIL PROTECTED]
---
 opensm/opensm/osm_opensm.c |   14 ++
 1 files changed, 14 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
index 7deea6d..7286782 100644
--- a/opensm/opensm/osm_opensm.c
+++ b/opensm/opensm/osm_opensm.c
@@ -238,6 +238,19 @@ static void destroy_routing_engines(osm_opensm_t *osm)
}
 }
 
+/**
+ **/
+static void destroy_plugins(osm_opensm_t *osm)
+{
+   osm_epi_plugin_t *p;
+   // remove from the list, and destroy it
+   while (!cl_is_qlist_empty(osm-plugin_list)){
+   p = (osm_epi_plugin_t *)cl_qlist_remove_head(osm-plugin_list);
+   // plugin is responsible for freeing its own resources
+   osm_epi_destroy(p);
+   }
+}
+
 void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
 {
/* in case of shutdown through exit proc - no ^C */
@@ -275,6 +288,7 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
osm_sa_db_file_dump(p_osm);
 
/* do the destruction in reverse order as init */
+   destroy_plugins(p_osm);
destroy_routing_engines(p_osm);
osm_sa_destroy(p_osm-sa);
osm_sm_destroy(p_osm-sm);
-- 
1.5.4.5

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

[ofa-general] Re: [PATCH] opensm/osm_multicast.c: bug with joining/leaving mcast group

2008-11-10 Thread Sasha Khapyorsky
Hi Yevgeny,

On 16:36 Mon 10 Nov , Yevgeny Kliteynik wrote:
 
 I think there's a bug in the osm_mgrp_add/remove_port functions.
 If some mcast group member has JoinState 0x1 (full member),
 and then new join from the same port received with JoinState
 0x2 (non member), OpenSM will reduce number of full members
 of this group, which eventually might cause group deletion.

Right, isn't this how things should work? When full member updates it
state to non member the number of full members are reduced, and then
last full member leaves the MC group is deleted (o15-0.2-1.9).

Sasha

 Similar problem (only in logically opposite direction) happens
 when port tries to partially leave mcast group.
 
 This patch should fix it.
 
 Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
 ---
  opensm/opensm/osm_multicast.c |   33 +++--
  1 files changed, 11 insertions(+), 22 deletions(-)
 
 diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
 index d62d585..350fd22 100644
 --- a/opensm/opensm/osm_multicast.c
 +++ b/opensm/opensm/osm_multicast.c
 @@ -172,17 +172,11 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t *subn, 
 osm_log_t *log,
   p_mgrp-last_change_id++;
   }
 
 - if ((join_state ^ prev_join_state)  IB_JOIN_STATE_FULL) {
 - if (join_state  IB_JOIN_STATE_FULL) {
 - if (++p_mgrp-full_members == 1) {
 - mgrp_send_notice(subn, log, p_mgrp, 66);
 - p_mgrp-to_be_deleted = 0;
 - }
 - } else if (--p_mgrp-full_members == 0) {
 - mgrp_send_notice(subn, log, p_mgrp, 67);
 - if (!p_mgrp-well_known)
 - p_mgrp-to_be_deleted = 1;
 - }
 + if ((join_state  IB_JOIN_STATE_FULL) 
 + !(prev_join_state  IB_JOIN_STATE_FULL) 
 + (++p_mgrp-full_members == 1)) {
 + mgrp_send_notice(subn, log, p_mgrp, 66);
 + p_mgrp-to_be_deleted = 0;
   }
 
   return (p_mcm_port);
 @@ -224,17 +218,12 @@ int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t 
 *log, osm_mgrp_t *mgrp,
 
   /* no more full members so the group will be deleted after re-route
  but only if it is not a well known group */
 - if ((port_join_state ^ new_join_state)  IB_JOIN_STATE_FULL) {
 - if (port_join_state  IB_JOIN_STATE_FULL) {
 - if (--mgrp-full_members == 0) {
 - mgrp_send_notice(subn, log, mgrp, 67);
 - if (!mgrp-well_known)
 - mgrp-to_be_deleted = 1;
 - }
 - } else if (++mgrp-full_members == 1) {
 - mgrp_send_notice(subn, log, mgrp, 66);
 - mgrp-to_be_deleted = 0;
 - }
 + if ((port_join_state  IB_JOIN_STATE_FULL) 
 + !(new_join_state  IB_JOIN_STATE_FULL) 
 + (--mgrp-full_members == 0)) {
 + mgrp_send_notice(subn, log, mgrp, 67);
 + if (!mgrp-well_known)
 + mgrp-to_be_deleted = 1;
   }
 
   return ret;
 -- 
 1.5.1.4
 
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm/osm_pkey.c: cosmetics in some log message

2008-11-10 Thread Sasha Khapyorsky
On 16:19 Mon 10 Nov , Yevgeny Kliteynik wrote:
 Hi Sasha,
 
 Just some cosmetics in a log message.
 
 Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]

Applied. Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm/ib_types.h: rename IB_MC_REC_STATE_SEND_ONLY_MEMBER

2008-11-10 Thread Sasha Khapyorsky
On 16:25 Mon 10 Nov , Yevgeny Kliteynik wrote:
 Sasha,
 
 The multicast Send Only bit is defined in spec as SendOnlyNonMemeber,
 to denote that the port is not considered a member for purposes of group
 creation/deletion.
 
 Renaming IB_MC_REC_STATE_SEND_ONLY_MEMBER to 
 IB_MC_REC_STATE_SEND_ONLY_NON_MEMBER.
 
 Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]

Applied. Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm/osm_multicast.c: bug with joining/leaving mcast group

2008-11-10 Thread Yevgeny Kliteynik

Hi Sasha,

Sasha Khapyorsky wrote:

Hi Yevgeny,

On 16:36 Mon 10 Nov , Yevgeny Kliteynik wrote:

I think there's a bug in the osm_mgrp_add/remove_port functions.
If some mcast group member has JoinState 0x1 (full member),
and then new join from the same port received with JoinState
0x2 (non member), OpenSM will reduce number of full members
of this group, which eventually might cause group deletion.


Right, isn't this how things should work? When full member updates it
state to non member the number of full members are reduced, and then
last full member leaves the MC group is deleted (o15-0.2-1.9).


I thought so too, but turns out that it's wrong:

o15-0.1.11: If SA supports UD multicast, then if an endport joins a
multicast group as specified in o15-0.1.10:, SA shall replace the
endport’s current MCMemberRecord:JoinState component with the logical
OR of the MCMemberRecord:JoinState component with the endport’s current
MCMemberRecord:JoinState component if the endport had joined this
multicast group before.

So the full member doesn't update its state to non-member, but rather
adds additional bit to the JoinState (the non-member).

-- Yevgeny


Sasha


Similar problem (only in logically opposite direction) happens
when port tries to partially leave mcast group.

This patch should fix it.

Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
---
 opensm/opensm/osm_multicast.c |   33 +++--
 1 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
index d62d585..350fd22 100644
--- a/opensm/opensm/osm_multicast.c
+++ b/opensm/opensm/osm_multicast.c
@@ -172,17 +172,11 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t *subn, 
osm_log_t *log,
p_mgrp-last_change_id++;
}

-   if ((join_state ^ prev_join_state)  IB_JOIN_STATE_FULL) {
-   if (join_state  IB_JOIN_STATE_FULL) {
-   if (++p_mgrp-full_members == 1) {
-   mgrp_send_notice(subn, log, p_mgrp, 66);
-   p_mgrp-to_be_deleted = 0;
-   }
-   } else if (--p_mgrp-full_members == 0) {
-   mgrp_send_notice(subn, log, p_mgrp, 67);
-   if (!p_mgrp-well_known)
-   p_mgrp-to_be_deleted = 1;
-   }
+   if ((join_state  IB_JOIN_STATE_FULL) 
+   !(prev_join_state  IB_JOIN_STATE_FULL) 
+   (++p_mgrp-full_members == 1)) {
+   mgrp_send_notice(subn, log, p_mgrp, 66);
+   p_mgrp-to_be_deleted = 0;
}

return (p_mcm_port);
@@ -224,17 +218,12 @@ int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t 
*log, osm_mgrp_t *mgrp,

/* no more full members so the group will be deleted after re-route
   but only if it is not a well known group */
-   if ((port_join_state ^ new_join_state)  IB_JOIN_STATE_FULL) {
-   if (port_join_state  IB_JOIN_STATE_FULL) {
-   if (--mgrp-full_members == 0) {
-   mgrp_send_notice(subn, log, mgrp, 67);
-   if (!mgrp-well_known)
-   mgrp-to_be_deleted = 1;
-   }
-   } else if (++mgrp-full_members == 1) {
-   mgrp_send_notice(subn, log, mgrp, 66);
-   mgrp-to_be_deleted = 0;
-   }
+   if ((port_join_state  IB_JOIN_STATE_FULL) 
+   !(new_join_state  IB_JOIN_STATE_FULL) 
+   (--mgrp-full_members == 0)) {
+   mgrp_send_notice(subn, log, mgrp, 67);
+   if (!mgrp-well_known)
+   mgrp-to_be_deleted = 1;
}

return ret;
--
1.5.1.4





___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm: osm_opensm.c added a method to remove plugins

2008-11-10 Thread Sasha Khapyorsky
On 10:26 Mon 10 Nov , Timothy A. Meier wrote:
 Sasha,
 
 During development, I am constantly bringing the SM up and down, so this 
 helps make sure things
 shut down gracefully.
 
 Should have no impact, if people are not using plugins... yet.
 
 From e0434e676d0b3dd63a323218d207f029da9e27a4 Mon Sep 17 00:00:00 2001
 From: Tim Meier [EMAIL PROTECTED]
 Date: Mon, 10 Nov 2008 09:48:55 -0800
 Subject: [PATCH] opensm:  osm_opensm.c added a method to remove plugins
 
 Upon shutdown, iterates through the plugins and releases
 resources and removes them via their destroy() method.
 
 Signed-off-by: Tim Meier [EMAIL PROTECTED]

Applied. Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


RE: [ofa-general] ib_mthca catastrophic error detected

2008-11-10 Thread Boris Shpolyansky

Scott,

Do you use any form of Boot-over-IB in this cluster?
If so - what version/flavor of it?

Thanks,
Boris Shpolyansky
Sr. Member of Technical Staff
Applications
Mellanox Technologies Inc.
2900 Stender Way
Santa Clara, CA 95054
Tel.: (408) 916 0014
Fax: (408) 970 3403
Cell: (408) 834 9365
www.mellanox.com

-Original Message-
From: [EMAIL PROTECTED]
[mailto:[EMAIL PROTECTED] On Behalf Of Scott A.
Friedman
Sent: Thursday, November 06, 2008 10:35 AM
To: Jack Morgenstein
Cc: Matthew Finlay; general@lists.openfabrics.org
Subject: Re: [ofa-general] ib_mthca catastrophic error detected

Hi

We have been working with Matthew Finlay [EMAIL PROTECTED] on this 
recently - you/we might pull all of this together. We are able to make 
any of our sdr cards have a catastrophic error - and are unable to do 
the same with our ddr cards. Matt has suggested that there is a firmware

fix possibly?

Anyway, to answer your questions:

The hosts are Sun X2200M, but we have swapped a few around with some 
hosts we have from Aspen systems and the problem remains. I suppose the 
similarity is that they are all nForce based.

The MPI used was the latest OpenMPI - I will find the version, but I do 
not think it matters whether we are using OpenMPI or MVAPICH.

The job itself does not seem to matter either. The situation is after a 
node comes up it takes a very long time for the card to become ACTIVE. 
It seems to ocsillate between ACTIVE and INIT. We have waited several 
minutes sometimes but can never be sure of when it will settle down. The

queue certainly doesn't know and a job submitted to such a node will die

as the cards will have a catastrophic error.

Scott


  Console output from the following linux commands:
cat /etc/*rel*


Not a good idea...maybe this

#cat /etc/redhat-release
CentOS release 5 (Final)

cat /etc/lilo.conf , or:  cat /boot/grub/menu.lst (if you are using

grub)

# grub.conf generated by anaconda
#
# Note that you do not have to rerun grub after making changes to this
file
# NOTICE:  You have a /boot partition.  This means that
#  all kernel and initrd paths are relative to /boot/, eg.
#  root (hd0,0)
#  kernel /vmlinuz-version ro root=/dev/hda3
#  initrd /initrd-version.img
#boot=/dev/hda
default=0
timeout=5
splashimage=(hd0,0)/grub/splash.xpm.gz
hiddenmenu
title CentOS (2.6.18-92.1.6.el5)
  root (hd0,0)
  kernel /vmlinuz-2.6.18-92.1.6.el5 ro root=LABEL=/ rhgb quiet
  initrd /initrd-2.6.18-92.1.6.el5.img


uname -a

Linux n141 2.6.18-92.1.6.el5 #1 SMP Wed Jun 25 13:45:47 EDT 2008 x86_64 
x86_64 x86_64 GNU/Linux


cat /proc/cpuinfo
cat /proc/meminfo

processor : 0
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 0
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
3dnowprefetch osvw
bogomips : 4424.75
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps hwpstate [8]

processor : 1
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 1
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
3dnowprefetch osvw
bogomips : 4426.22
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps hwpstate [8]

processor : 2
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 2
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
3dnowprefetch osvw
bogomips : 4421.37
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps 

***SPAM*** Re: [ofa-general] Re: [PATCH] opensm/osm_multicast.c: bug with joining/leaving mcast group

2008-11-10 Thread Hal Rosenstock
On Mon, Nov 10, 2008 at 2:18 PM, Yevgeny Kliteynik
[EMAIL PROTECTED] wrote:
 Hi Sasha,

 Sasha Khapyorsky wrote:

 Hi Yevgeny,

 On 16:36 Mon 10 Nov , Yevgeny Kliteynik wrote:

 I think there's a bug in the osm_mgrp_add/remove_port functions.
 If some mcast group member has JoinState 0x1 (full member),
 and then new join from the same port received with JoinState
 0x2 (non member), OpenSM will reduce number of full members
 of this group, which eventually might cause group deletion.

 Right, isn't this how things should work? When full member updates it
 state to non member the number of full members are reduced, and then
 last full member leaves the MC group is deleted (o15-0.2-1.9).

 I thought so too,

It's true; what you are seeing is the addition of send only non member
(to full member) and not eliminating full member.

but turns out that it's wrong:

 o15-0.1.11: If SA supports UD multicast, then if an endport joins a
 multicast group as specified in o15-0.1.10:, SA shall replace the
 endport's current MCMemberRecord:JoinState component with the logical
 OR of the MCMemberRecord:JoinState component with the endport's current
 MCMemberRecord:JoinState component if the endport had joined this
 multicast group before.

 So the full member doesn't update its state to non-member, but rather
 adds additional bit to the JoinState (the non-member).

Right, a port can simultaneously be full member, non member, and send
only non member.

-- Hal


 -- Yevgeny

 Sasha

 Similar problem (only in logically opposite direction) happens
 when port tries to partially leave mcast group.

 This patch should fix it.

 Signed-off-by: Yevgeny Kliteynik [EMAIL PROTECTED]
 ---
  opensm/opensm/osm_multicast.c |   33 +++--
  1 files changed, 11 insertions(+), 22 deletions(-)

 diff --git a/opensm/opensm/osm_multicast.c
 b/opensm/opensm/osm_multicast.c
 index d62d585..350fd22 100644
 --- a/opensm/opensm/osm_multicast.c
 +++ b/opensm/opensm/osm_multicast.c
 @@ -172,17 +172,11 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t
 *subn, osm_log_t *log,
p_mgrp-last_change_id++;
}

 -   if ((join_state ^ prev_join_state)  IB_JOIN_STATE_FULL) {
 -   if (join_state  IB_JOIN_STATE_FULL) {
 -   if (++p_mgrp-full_members == 1) {
 -   mgrp_send_notice(subn, log, p_mgrp, 66);
 -   p_mgrp-to_be_deleted = 0;
 -   }
 -   } else if (--p_mgrp-full_members == 0) {
 -   mgrp_send_notice(subn, log, p_mgrp, 67);
 -   if (!p_mgrp-well_known)
 -   p_mgrp-to_be_deleted = 1;
 -   }
 +   if ((join_state  IB_JOIN_STATE_FULL) 
 +   !(prev_join_state  IB_JOIN_STATE_FULL) 
 +   (++p_mgrp-full_members == 1)) {
 +   mgrp_send_notice(subn, log, p_mgrp, 66);
 +   p_mgrp-to_be_deleted = 0;
}

return (p_mcm_port);
 @@ -224,17 +218,12 @@ int osm_mgrp_remove_port(osm_subn_t *subn,
 osm_log_t *log, osm_mgrp_t *mgrp,

/* no more full members so the group will be deleted after
 re-route
   but only if it is not a well known group */
 -   if ((port_join_state ^ new_join_state)  IB_JOIN_STATE_FULL) {
 -   if (port_join_state  IB_JOIN_STATE_FULL) {
 -   if (--mgrp-full_members == 0) {
 -   mgrp_send_notice(subn, log, mgrp, 67);
 -   if (!mgrp-well_known)
 -   mgrp-to_be_deleted = 1;
 -   }
 -   } else if (++mgrp-full_members == 1) {
 -   mgrp_send_notice(subn, log, mgrp, 66);
 -   mgrp-to_be_deleted = 0;
 -   }
 +   if ((port_join_state  IB_JOIN_STATE_FULL) 
 +   !(new_join_state  IB_JOIN_STATE_FULL) 
 +   (--mgrp-full_members == 0)) {
 +   mgrp_send_notice(subn, log, mgrp, 67);
 +   if (!mgrp-well_known)
 +   mgrp-to_be_deleted = 1;
}

return ret;
 --
 1.5.1.4



 ___
 general mailing list
 general@lists.openfabrics.org
 http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

 To unsubscribe, please visit
 http://openib.org/mailman/listinfo/openib-general

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm/osm_multicast.c: bug with joining/leaving mcast group

2008-11-10 Thread Sasha Khapyorsky
On 21:18 Mon 10 Nov , Yevgeny Kliteynik wrote:
 Hi Sasha,

 Sasha Khapyorsky wrote:
 Hi Yevgeny,
 On 16:36 Mon 10 Nov , Yevgeny Kliteynik wrote:
 I think there's a bug in the osm_mgrp_add/remove_port functions.
 If some mcast group member has JoinState 0x1 (full member),
 and then new join from the same port received with JoinState
 0x2 (non member), OpenSM will reduce number of full members
 of this group, which eventually might cause group deletion.
 Right, isn't this how things should work? When full member updates it
 state to non member the number of full members are reduced, and then
 last full member leaves the MC group is deleted (o15-0.2-1.9).

 I thought so too, but turns out that it's wrong:

 o15-0.1.11: If SA supports UD multicast, then if an endport joins a
 multicast group as specified in o15-0.1.10:, SA shall replace the
 endport?s current MCMemberRecord:JoinState component with the logical
 OR of the MCMemberRecord:JoinState component with the endport?s current
 MCMemberRecord:JoinState component if the endport had joined this
 multicast group before.

 So the full member doesn't update its state to non-member, but rather
 adds additional bit to the JoinState (the non-member).

Ok. I see now.

Applied. Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [ofa-general] ib_mthca catastrophic error detected

2008-11-10 Thread Scott A. Friedman

Hi

No, no boot over IB - in fact there is no IPoIB configured on this 
cluster at all.


The firmware Matt sent seems to have fixed the problem as we have been 
unable to reproduce since we flashed some test nodes. We are in the 
process of flashing the remaining 100 or so nodes that have SDR cards as 
jobs finish.


Scott

Boris Shpolyansky wrote:

Scott,

Do you use any form of Boot-over-IB in this cluster?
If so - what version/flavor of it?

Thanks,
Boris Shpolyansky
Sr. Member of Technical Staff
Applications
Mellanox Technologies Inc.
2900 Stender Way
Santa Clara, CA 95054
Tel.: (408) 916 0014
Fax: (408) 970 3403
Cell: (408) 834 9365
www.mellanox.com

-Original Message-
From: [EMAIL PROTECTED]
[mailto:[EMAIL PROTECTED] On Behalf Of Scott A.
Friedman
Sent: Thursday, November 06, 2008 10:35 AM
To: Jack Morgenstein
Cc: Matthew Finlay; general@lists.openfabrics.org
Subject: Re: [ofa-general] ib_mthca catastrophic error detected

Hi

We have been working with Matthew Finlay [EMAIL PROTECTED] on this 
recently - you/we might pull all of this together. We are able to make 
any of our sdr cards have a catastrophic error - and are unable to do 
the same with our ddr cards. Matt has suggested that there is a firmware


fix possibly?

Anyway, to answer your questions:

The hosts are Sun X2200M, but we have swapped a few around with some 
hosts we have from Aspen systems and the problem remains. I suppose the 
similarity is that they are all nForce based.


The MPI used was the latest OpenMPI - I will find the version, but I do 
not think it matters whether we are using OpenMPI or MVAPICH.


The job itself does not seem to matter either. The situation is after a 
node comes up it takes a very long time for the card to become ACTIVE. 
It seems to ocsillate between ACTIVE and INIT. We have waited several 
minutes sometimes but can never be sure of when it will settle down. The


queue certainly doesn't know and a job submitted to such a node will die

as the cards will have a catastrophic error.

Scott


  Console output from the following linux commands:
cat /etc/*rel*


Not a good idea...maybe this

#cat /etc/redhat-release
CentOS release 5 (Final)

cat /etc/lilo.conf , or:  cat /boot/grub/menu.lst (if you are using

grub)

# grub.conf generated by anaconda
#
# Note that you do not have to rerun grub after making changes to this
file
# NOTICE:  You have a /boot partition.  This means that
#  all kernel and initrd paths are relative to /boot/, eg.
#  root (hd0,0)
#  kernel /vmlinuz-version ro root=/dev/hda3
#  initrd /initrd-version.img
#boot=/dev/hda
default=0
timeout=5
splashimage=(hd0,0)/grub/splash.xpm.gz
hiddenmenu
title CentOS (2.6.18-92.1.6.el5)
  root (hd0,0)
  kernel /vmlinuz-2.6.18-92.1.6.el5 ro root=LABEL=/ rhgb quiet
  initrd /initrd-2.6.18-92.1.6.el5.img


uname -a

Linux n141 2.6.18-92.1.6.el5 #1 SMP Wed Jun 25 13:45:47 EDT 2008 x86_64 
x86_64 x86_64 GNU/Linux



cat /proc/cpuinfo
cat /proc/meminfo

processor : 0
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 0
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
3dnowprefetch osvw

bogomips : 4424.75
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps hwpstate [8]

processor : 1
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 1
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
3dnowprefetch osvw

bogomips : 4426.22
TLB size : 1024 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management: ts ttp tm stc 100mhzsteps hwpstate [8]

processor : 2
vendor_id : AuthenticAMD
cpu family   : 16
model  : 2
model name   : Quad-Core AMD Opteron(tm) Processor 2354
stepping : 3
cpu MHz  : 2200.000
cache size   : 512 KB
physical id  : 0
siblings : 4
core id  : 2
cpu cores : 4
fpu  : yes
fpu_exception : yes
cpuid level  : 5
wp  : yes
flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov 
pat pse36 clflush mmx fxsr sse sse2 ht syscall 

[ofa-general] Re: [PATCH 1/2] fix default configuration files path

2008-11-10 Thread Sasha Khapyorsky
On 11:48 Thu 06 Nov , Doron Shoham wrote:
 fix default configuration files path in QoS_management_in_OpenSM.txt file
 from /usr/local/etc/opensm/ to /etc/opensm/
 
 Signed-off-by: Doron Shoham [EMAIL PROTECTED]
 ---
  opensm/doc/QoS_management_in_OpenSM.txt |6 +++---
  1 files changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/opensm/doc/QoS_management_in_OpenSM.txt 
 b/opensm/doc/QoS_management_in_OpenSM.txt
 index ba1b4b1..1a48b1a 100644
 --- a/opensm/doc/QoS_management_in_OpenSM.txt
 +++ b/opensm/doc/QoS_management_in_OpenSM.txt
 @@ -20,7 +20,7 @@
  
  When QoS in OpenSM is enabled (-Q or --qos), OpenSM looks for QoS Policy 
 file.
  The default name of OpenSM QoS policy file is
 -/usr/local/etc/opensm/qos-policy.conf. The default may be changed by using -Y
 +/etc/opensm/qos-policy.conf. The default may be changed by using -Y
  or --qos_policy_file option with OpenSM.

The OpenSM config dir is configured value so it could be
/usr/local/etc/opensm or /etc/opensm or something else.

Basically I'm fine with using '/etc/opensm', but then it should be
updated to other docs too (specifically in
doc/performance-manager-HOWTO.txt).

Other way to handle this is to make *.in templates for those docs where
config path is used and generate the file in ./configure time (similar
to how it is done with OpenSM man page). Probably it is overkill for
docs...

Thoughts?

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] export osm_log_max in MB

2008-11-10 Thread Sasha Khapyorsky
On 13:59 Thu 06 Nov , Doron Shoham wrote:
 export the osm_log_max in MB when using 'opensm -c conf
 
 Signed-off-by: Doron Shoham [EMAIL PROTECTED]

Both applied. Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] opensm/opensm/osm_state_mgr.c: Add check for valid physical port before using pointer.

2008-11-10 Thread Sasha Khapyorsky
On 09:57 Tue 04 Nov , Ira Weiny wrote:
 From 567c3893f24f4dc25ef5f4e74ef9deeb8ae541ad Mon Sep 17 00:00:00 2001
 From: Ira Weiny [EMAIL PROTECTED]
 Date: Mon, 3 Nov 2008 14:47:50 -0800
 Subject: [PATCH] opensm/opensm/osm_state_mgr.c: Add check for valid physical 
 port before using
  pointer.
 
There are times when PortInfo fails which leaves osm_node_t with invalid
osm_physp_t pointers.  In this case do not use an invalid pointer.
 
 Signed-off-by: Ira Weiny [EMAIL PROTECTED]

Applied. Thanks.

However some note is below.

 ---
  opensm/opensm/osm_state_mgr.c |6 ++
  1 files changed, 6 insertions(+), 0 deletions(-)
 
 diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
 index ba3b6bf..841438c 100644
 --- a/opensm/opensm/osm_state_mgr.c
 +++ b/opensm/opensm/osm_state_mgr.c
 @@ -542,6 +542,12 @@ static void __osm_state_mgr_get_node_desc(IN 
 cl_map_item_t * const p_object,
  
   /* get a physp to request from. */
   p_physp = osm_node_get_any_physp_ptr(p_node);
 + if (!osm_physp_is_valid(p_physp)) {
 + OSM_LOG(sm-p_log, OSM_LOG_ERROR,
 + __osm_state_mgr_get_node_desc: ERR 331C: 
 + Failed to get valid physical port object\n);
 + goto exit;
 + }

Actually it can be a valid case. For example when node was first time
discovered via port A, when this port was disconnected and the same node
was discovered via port B - it is not a new node and node_info (where
port number for osm_node_get_any_physp_ptr() is stored) will not be
updated.

Obviously the patch is fine. But probably we need more general fix, for
example to redo osm_node_get_any_physp_ptr() so that it will not return
invalid ports. Need to review other osm_node_get_any_physp_ptr() usages.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [PATCH] IB/ehca: Fix suppression of port activation events

2008-11-10 Thread Roland Dreier
  A previous fix introduced a regression where port activation events were
  dropped unconditionally if port autodetection was not enabled. Fixed.

Is this a fix to IB/ehca: Remove reference to special QP in case of
port activation failure?  Because if so I can roll it into that patch,
since Linus hasn't pulled it yet.

 - R.
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


RE: [ofa-general] ib_mthca catastrophic error detected

2008-11-10 Thread Boris Shpolyansky
OK, great!

Please, update us as soon as you have the entire cluster upgraded to the
new FW and have run more tests on it.

Thanks,
Boris Shpolyansky
Sr. Member of Technical Staff
Applications
Mellanox Technologies Inc.
2900 Stender Way
Santa Clara, CA 95054
Tel.: (408) 916 0014
Fax: (408) 970 3403
Cell: (408) 834 9365
www.mellanox.com

-Original Message-
From: Scott A. Friedman [mailto:[EMAIL PROTECTED] 
Sent: Monday, November 10, 2008 11:45 AM
To: Boris Shpolyansky
Cc: Jack Morgenstein; Matthew Finlay; general@lists.openfabrics.org
Subject: Re: [ofa-general] ib_mthca catastrophic error detected

Hi

No, no boot over IB - in fact there is no IPoIB configured on this 
cluster at all.

The firmware Matt sent seems to have fixed the problem as we have been 
unable to reproduce since we flashed some test nodes. We are in the 
process of flashing the remaining 100 or so nodes that have SDR cards as

jobs finish.

Scott

Boris Shpolyansky wrote:
 Scott,
 
 Do you use any form of Boot-over-IB in this cluster?
 If so - what version/flavor of it?
 
 Thanks,
 Boris Shpolyansky
 Sr. Member of Technical Staff
 Applications
 Mellanox Technologies Inc.
 2900 Stender Way
 Santa Clara, CA 95054
 Tel.: (408) 916 0014
 Fax: (408) 970 3403
 Cell: (408) 834 9365
 www.mellanox.com
 
 -Original Message-
 From: [EMAIL PROTECTED]
 [mailto:[EMAIL PROTECTED] On Behalf Of Scott A.
 Friedman
 Sent: Thursday, November 06, 2008 10:35 AM
 To: Jack Morgenstein
 Cc: Matthew Finlay; general@lists.openfabrics.org
 Subject: Re: [ofa-general] ib_mthca catastrophic error detected
 
 Hi
 
 We have been working with Matthew Finlay [EMAIL PROTECTED] on this 
 recently - you/we might pull all of this together. We are able to make

 any of our sdr cards have a catastrophic error - and are unable to do 
 the same with our ddr cards. Matt has suggested that there is a
firmware
 
 fix possibly?
 
 Anyway, to answer your questions:
 
 The hosts are Sun X2200M, but we have swapped a few around with some 
 hosts we have from Aspen systems and the problem remains. I suppose
the 
 similarity is that they are all nForce based.
 
 The MPI used was the latest OpenMPI - I will find the version, but I
do 
 not think it matters whether we are using OpenMPI or MVAPICH.
 
 The job itself does not seem to matter either. The situation is after
a 
 node comes up it takes a very long time for the card to become ACTIVE.

 It seems to ocsillate between ACTIVE and INIT. We have waited several 
 minutes sometimes but can never be sure of when it will settle down.
The
 
 queue certainly doesn't know and a job submitted to such a node will
die
 
 as the cards will have a catastrophic error.
 
 Scott
 
 
   Console output from the following linux commands:
 cat /etc/*rel*
 
 
 Not a good idea...maybe this
 
 #cat /etc/redhat-release
 CentOS release 5 (Final)
 
 cat /etc/lilo.conf , or:  cat /boot/grub/menu.lst (if you are
using
 
 grub)
 
 # grub.conf generated by anaconda
 #
 # Note that you do not have to rerun grub after making changes to this
 file
 # NOTICE:  You have a /boot partition.  This means that
 #  all kernel and initrd paths are relative to /boot/, eg.
 #  root (hd0,0)
 #  kernel /vmlinuz-version ro root=/dev/hda3
 #  initrd /initrd-version.img
 #boot=/dev/hda
 default=0
 timeout=5
 splashimage=(hd0,0)/grub/splash.xpm.gz
 hiddenmenu
 title CentOS (2.6.18-92.1.6.el5)
   root (hd0,0)
   kernel /vmlinuz-2.6.18-92.1.6.el5 ro root=LABEL=/ rhgb quiet
   initrd /initrd-2.6.18-92.1.6.el5.img
 
 
 uname -a
 
 Linux n141 2.6.18-92.1.6.el5 #1 SMP Wed Jun 25 13:45:47 EDT 2008
x86_64 
 x86_64 x86_64 GNU/Linux
 
 
 cat /proc/cpuinfo
 cat /proc/meminfo
 
 processor : 0
 vendor_id : AuthenticAMD
 cpu family   : 16
 model  : 2
 model name   : Quad-Core AMD Opteron(tm) Processor 2354
 stepping : 3
 cpu MHz  : 2200.000
 cache size   : 512 KB
 physical id  : 0
 siblings : 4
 core id  : 0
 cpu cores : 4
 fpu  : yes
 fpu_exception : yes
 cpuid level  : 5
 wp  : yes
 flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov

 pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
 pdpe1gb rdtscp lm 3dnowext 3dnow constant_tsc pni cx16 popcnt lahf_lm 
 cmp_legacy svm extapic cr8_legacy altmovcr8 abm sse4a misalignsse 
 3dnowprefetch osvw
 bogomips : 4424.75
 TLB size : 1024 4K pages
 clflush size : 64
 cache_alignment : 64
 address sizes : 48 bits physical, 48 bits virtual
 power management: ts ttp tm stc 100mhzsteps hwpstate [8]
 
 processor : 1
 vendor_id : AuthenticAMD
 cpu family   : 16
 model  : 2
 model name   : Quad-Core AMD Opteron(tm) Processor 2354
 stepping : 3
 cpu MHz  : 2200.000
 cache size   : 512 KB
 physical id  : 0
 siblings : 4
 core id  : 1
 cpu cores : 4
 fpu  : yes
 fpu_exception : yes
 cpuid level  : 5
 wp  : yes
 flags  : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov

 pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt 
 pdpe1gb rdtscp 

[ofa-general] Re: [opensm patch][2/2] verify config inputs when config file is rescanned

2008-11-10 Thread Sasha Khapyorsky
Hi Al,

On 15:01 Thu 30 Oct , Al Chu wrote:
 Hey Sasha,
 
 I noticed that after the config file is rescanned, the new potential
 inputs aren't checked for validity.  Patch is attached.
 
 Al
 
 -- 
 Albert Chu
 [EMAIL PROTECTED]
 Computer Scientist
 High Performance Systems Division
 Lawrence Livermore National Laboratory

 From edfcd2de96c3525d1609b4c0f03c17ecc0495c18 Mon Sep 17 00:00:00 2001
 From: root [EMAIL PROTECTED](none)
 Date: Thu, 30 Oct 2008 13:58:55 -0700
 Subject: [PATCH] verify rescanned config input
 
 
 Signed-off-by: root [EMAIL PROTECTED](none)
 

I'm fine with this patch, but could you fix S-O-B line? Thanks.

Sasha
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [ofa-general] Re: [opensm patch] support dump_conf command in opensm console

2008-11-10 Thread Al Chu
Hey Sasha,

Attached is the re-worked patch.  Assumes changes from my fix qos
config parsing bugs patch are accepted.

Al

On Mon, 2008-11-10 at 09:42 -0800, Al Chu wrote:
 Hey Sasha,
 
 On Sun, 2008-11-09 at 19:25 +0200, Sasha Khapyorsky wrote:
  Hi Al,
  
  On 16:39 Mon 03 Nov , Al Chu wrote:
   Hey Sasha,
   
   When config files are rescanned and loaded, there's no way to know if
   the right configuration was actually reloaded or not.  A console command
   to dump the current config is a useful way to verify the loading of new
   configs or not.
   
   This patch assumes the fixes from my fix qos config parsing bugs is
   accepted.
  
  Didn't pass over it, sorry about delay.
  
   
   Al
   
   -- 
   Albert Chu
   [EMAIL PROTECTED]
   Computer Scientist
   High Performance Systems Division
   Lawrence Livermore National Laboratory
  
   From 249607e47ec7ef1b92f9578cece90460418d12b8 Mon Sep 17 00:00:00 2001
   From: Albert Chu [EMAIL PROTECTED]
   Date: Mon, 3 Nov 2008 16:22:29 -0800
   Subject: [PATCH] support dump_conf console command
   
   
   Signed-off-by: Albert Chu [EMAIL PROTECTED]
   ---
opensm/opensm/osm_console.c |  158 
   +++
1 files changed, 158 insertions(+), 0 deletions(-)
   
   diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
   index d9bbbc2..8422655 100644
   --- a/opensm/opensm/osm_console.c
   +++ b/opensm/opensm/osm_console.c
   @@ -53,6 +53,10 @@
#include complib/cl_passivelock.h
#include opensm/osm_perfmgr.h

   +#define NULL_STR (null)
   +
   +#define BOOLEAN_STR(__b) ((__b) ? TRUE : FALSE)
   +
struct command {
 char *name;
 void (*help_function) (FILE * out, int detail);
   @@ -189,6 +193,14 @@ static void help_lidbalance(FILE * out, int detail)
 }
}

   +static void help_dump_conf(FILE *out, int detail)
   +{
   + fprintf(out, dump_conf\n);
   + if (detail) {
   + fprintf(out, dump current opensm configuration\n);
   + }
   +}
   +
#ifdef ENABLE_OSM_PERF_MGR
static void help_perfmgr(FILE * out, int detail)
{
   @@ -1136,6 +1148,151 @@ static void perfmgr_parse(char **p_last, 
   osm_opensm_t * p_osm, FILE * out)
}
#endif   /* ENABLE_OSM_PERF_MGR */

   +static void dump_qos_options(osm_qos_options_t * opt,
   +  osm_qos_options_t * dflt, 
   +  char *prefix,
   +  FILE * out)
   +{
   + fprintf(out, %s_max_vls : %u\n,
   + prefix, opt-max_vls ? opt-max_vls : dflt-max_vls);
   + fprintf(out, %s_high_limit : %u\n,
   + prefix, opt-high_limit = 0 ? (unsigned)opt-high_limit : 
   (unsigned)dflt-high_limit);
   + fprintf(out, %s_vlarb_high : %s\n,
   + prefix, opt-vlarb_high ? opt-vlarb_high : dflt-vlarb_high);
   + fprintf(out, %s_vlarb_low : %s\n,
   + prefix, opt-vlarb_low ? opt-vlarb_low : dflt-vlarb_low);
   + fprintf(out, %s_sl2vl : %s\n,
   + prefix, opt-sl2vl ? opt-sl2vl : dflt-sl2vl);
   +}
   +
   +static void dump_conf_parse(char **p_last, osm_opensm_t * p_osm, FILE * 
   out)
   +{
  
  Why to not use osm_subn_write_conf_file() function (wrapped by
  dump_conf_parse())? I think we need to have config dumping code
  consolidated.
 
 I had thought of that, but I didn't want all of the instructions and all
 the extra lines of output.  But I guess it's not that big of a deal in
 the end.  I'll send a new patch.
 
 Al
 
  Sasha
  
   + osm_subn_opt_t * opt = p_osm-subn.opt;
   +
   + fprintf(out, config_file : %s\n, 
   + opt-config_file ? opt-config_file : NULL_STR);
   + fprintf(out, guid : 0x%016 PRIx64 \n, opt-guid);
   + fprintf(out, m_key : 0x%016 PRIx64 \n, opt-m_key);
   + fprintf(out, sm_key : 0x%016 PRIx64 \n, opt-sm_key);
   + fprintf(out, sa_key : 0x%016 PRIx64 \n, opt-sa_key);
   + fprintf(out, subnet_prefix : 0x%016 PRIx64 \n, opt-subnet_prefix);
   + fprintf(out, m_key_lease_period : %u\n, opt-m_key_lease_period);
   + fprintf(out, sweep_interval : %u\n, opt-sweep_interval);
   + fprintf(out, max_wire_smps : %u\n, opt-max_wire_smps);
   + fprintf(out, transaction_timeout : %u\n, opt-transaction_timeout);
   + fprintf(out, sm_priority : %u\n, opt-sm_priority);
   + fprintf(out, lmc : %u\n, opt-lmc);
   + fprintf(out, lmc_esp0 : %s\n, 
   + BOOLEAN_STR(opt-lmc_esp0));
   + fprintf(out, max_op_vls : %u\n, opt-max_op_vls);
   + fprintf(out, force_link_speed : %u\n, opt-force_link_speed);
   + fprintf(out, reassign_lids : %s\n, 
   + BOOLEAN_STR(opt-reassign_lids));
   + fprintf(out, ignore_other_sm : %s\n, 
   + BOOLEAN_STR(opt-ignore_other_sm));
   + fprintf(out, single_thread : %s\n, 
   + BOOLEAN_STR(opt-single_thread));
   + fprintf(out, disable_multicast : %s\n, 
   + BOOLEAN_STR(opt-disable_multicast));
   + fprintf(out, force_log_flush : %s\n, 
   + BOOLEAN_STR(opt-force_log_flush));
   + fprintf(out, subnet_timeout : %u\n, 

[ofa-general] Re: [PATCH] opensm/opensm/osm_state_mgr.c: Add check for valid physical port before using pointer.

2008-11-10 Thread Ira Weiny
On Mon, 10 Nov 2008 22:13:33 +0200
Sasha Khapyorsky [EMAIL PROTECTED] wrote:

 On 09:57 Tue 04 Nov , Ira Weiny wrote:
  From 567c3893f24f4dc25ef5f4e74ef9deeb8ae541ad Mon Sep 17 00:00:00 2001
  From: Ira Weiny [EMAIL PROTECTED]
  Date: Mon, 3 Nov 2008 14:47:50 -0800
  Subject: [PATCH] opensm/opensm/osm_state_mgr.c: Add check for valid 
  physical port before using
   pointer.
  
 There are times when PortInfo fails which leaves osm_node_t with invalid
 osm_physp_t pointers.  In this case do not use an invalid pointer.
  
  Signed-off-by: Ira Weiny [EMAIL PROTECTED]
 
 Applied. Thanks.
 
 However some note is below.
 
  ---
   opensm/opensm/osm_state_mgr.c |6 ++
   1 files changed, 6 insertions(+), 0 deletions(-)
  
  diff --git a/opensm/opensm/osm_state_mgr.c b/opensm/opensm/osm_state_mgr.c
  index ba3b6bf..841438c 100644
  --- a/opensm/opensm/osm_state_mgr.c
  +++ b/opensm/opensm/osm_state_mgr.c
  @@ -542,6 +542,12 @@ static void __osm_state_mgr_get_node_desc(IN 
  cl_map_item_t * const p_object,
   
  /* get a physp to request from. */
  p_physp = osm_node_get_any_physp_ptr(p_node);
  +   if (!osm_physp_is_valid(p_physp)) {
  +   OSM_LOG(sm-p_log, OSM_LOG_ERROR,
  +   __osm_state_mgr_get_node_desc: ERR 331C: 
  +   Failed to get valid physical port object\n);
  +   goto exit;
  +   }
 
 Actually it can be a valid case. For example when node was first time
 discovered via port A, when this port was disconnected and the same node
 was discovered via port B - it is not a new node and node_info (where
 port number for osm_node_get_any_physp_ptr() is stored) will not be
 updated.

Ah, good point, I just happened to see it when PortInfo failed.

 
 Obviously the patch is fine. But probably we need more general fix, for
 example to redo osm_node_get_any_physp_ptr() so that it will not return
 invalid ports. Need to review other osm_node_get_any_physp_ptr() usages.

I was wondering if it would return invalid ports ever.  It would be easy for it
to return only valid ports but perhaps that should be another function to
preserve functionality?

Ira

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] Re: [opensm patch][2/2] verify config inputs when config file is rescanned

2008-11-10 Thread Al Chu
On Mon, 2008-11-10 at 23:02 +0200, Sasha Khapyorsky wrote:
 Hi Al,
 
 On 15:01 Thu 30 Oct , Al Chu wrote:
  Hey Sasha,
  
  I noticed that after the config file is rescanned, the new potential
  inputs aren't checked for validity.  Patch is attached.
  
  Al
  
  -- 
  Albert Chu
  [EMAIL PROTECTED]
  Computer Scientist
  High Performance Systems Division
  Lawrence Livermore National Laboratory
 
  From edfcd2de96c3525d1609b4c0f03c17ecc0495c18 Mon Sep 17 00:00:00 2001
  From: root [EMAIL PROTECTED](none)
  Date: Thu, 30 Oct 2008 13:58:55 -0700
  Subject: [PATCH] verify rescanned config input
  
  
  Signed-off-by: root [EMAIL PROTECTED](none)
  
 
 I'm fine with this patch, but could you fix S-O-B line? Thanks.

Oops.  New one is attached (I'll repost the [1/2] patch too).

Al

 Sasha
-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From edfcd2de96c3525d1609b4c0f03c17ecc0495c18 Mon Sep 17 00:00:00 2001
From: root [EMAIL PROTECTED](none)
Date: Thu, 30 Oct 2008 13:58:55 -0700
Subject: [PATCH] verify rescanned config input


Signed-off-by: Albert Chu [EMAIL PROTECTED]
---
 opensm/opensm/osm_subnet.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index ab2ff9c..5cf9c33 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -71,6 +71,8 @@
 
 static const char null_str[] = (null);
 
+static void subn_verify_conf_file(IN osm_subn_opt_t * const p_opts);
+
 /**
  **/
 void osm_subn_construct(IN osm_subn_t * const p_subn)
@@ -852,6 +854,8 @@ int osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn)
 	}
 	fclose(opts_file);
 
+	subn_verify_conf_file(p_subn-opt);
+
 	osm_parse_prefix_routes_file(p_subn);
 
 	return 0;
-- 
1.5.4.5

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [ofa-general] [opensm patch][1/2] fix qos config parsing bugs

2008-11-10 Thread Al Chu
Hey Sasha,

New patch w/ proper signed off by line.

Al

On Thu, 2008-10-30 at 15:01 -0700, Al Chu wrote:
 Hey Sasha,
 
 I found a bunch of qos config parsing issues, listed below:
 
 1)
 
 If the user sets the qos default fields (i.e. qos_high_limit,
 qos_vlarb_high. etc.), but do not have the qos_ca, qos_swe, qos_rtr,
 etc. equivalent fields listed (i.e. qos_ca_high_limit,
 qos_sw0_vlarb_high), the values set in teh qos default fields are not
 loaded into the CAs, switches, etc.  The reason is in qos_build_config()
 we load defaults like this:
 
 p = opt-vlarb_high ? opt-vlarb_high : dflt-vlarb_high;
 
 but we always set the fields to something non-NULL.
 
 static void subn_set_default_qos_options(IN osm_qos_options_t * opt)
 {
 opt-max_vls = OSM_DEFAULT_QOS_MAX_VLS;
 opt-high_limit = OSM_DEFAULT_QOS_HIGH_LIMIT;
 opt-vlarb_high = OSM_DEFAULT_QOS_VLARB_HIGH;
 opt-vlarb_low = OSM_DEFAULT_QOS_VLARB_LOW;
 opt-sl2vl = OSM_DEFAULT_QOS_SL2VL;
 }
 
 2)
 
 In qos_build_config() we load the high_limit like this:
 
 cfg-vl_high_limit = (uint8_t) opt-high_limit;
 
 So there is no way to tell the qos_ca, qos_swe, qos_rtr, etc. high_limit
 options to go back to the default high_limit.  It just assumes that
 whatever is input (or was set by default) is what you should use.
 
 3)
 
 Some fields like qos_vlarb_high are assumed to be correctly set and can
 segfault opensm.
 
 The attached patch fixes these up.  Obviously there's tons of ways to
 do this.  I decided to ...
 
 A) only initialization qos_options to the real defaults
 
 B) init all qos_*_options to sentinel values (-1, NULL, etc.) to
 indicate it should use the configured defaults if they aren't set by the
 user.  The high_limit was changed from an unsigned to an int b/c 0 is a
 valid high_limit value.
 
 C) verify that the default qos inputs are definitely correct (i.e. can't
 be NULL).  Reset to hard coded defaults if need be.
 
 D) load the default vs. non-default appropriately in QoS.
 
 Al
 
 P.S.  This patch does not rely on my previous remove qos_max_vls
 config patch.  I assume we're keeping the max_vls fields in this patch.
 
 ___
 general mailing list
 general@lists.openfabrics.org
 http:// lists.openfabrics.org/cgi-bin/mailman/listinfo/general
 
 To unsubscribe, please visit http:// 
 openib.org/mailman/listinfo/openib-general
-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From 00a15a1797b79fd5e3298d98742b6da3613fb9c3 Mon Sep 17 00:00:00 2001
From: root [EMAIL PROTECTED](none)
Date: Thu, 30 Oct 2008 09:32:29 -0700
Subject: [PATCH] fix qos config parsing bugs


Signed-off-by: Albert Chu [EMAIL PROTECTED]
---
 opensm/include/opensm/osm_subnet.h |   12 +-
 opensm/opensm/osm_qos.c|6 +-
 opensm/opensm/osm_subnet.c |  467 ++--
 3 files changed, 293 insertions(+), 192 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 7259587..11063b7 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -99,7 +99,7 @@ struct osm_qos_policy;
 */
 typedef struct osm_qos_options {
 	unsigned max_vls;
-	unsigned high_limit;
+	int high_limit;
 	char *vlarb_high;
 	char *vlarb_low;
 	char *sl2vl;
@@ -108,20 +108,20 @@ typedef struct osm_qos_options {
 * FIELDS
 *
 *	max_vls
-*		The number of maximum VLs on the Subnet
+*		The number of maximum VLs on the Subnet (0 == use default)
 *
 *	high_limit
 *		The limit of High Priority component of VL Arbitration
-*		table (IBA 7.6.9)
+*		table (IBA 7.6.9) (-1 == use default)
 *
 *	vlarb_high
-*		High priority VL Arbitration table template.
+*		High priority VL Arbitration table template. (NULL == use default)
 *
 *	vlarb_low
-*		Low priority VL Arbitration table template.
+*		Low priority VL Arbitration table template. (NULL == use default)
 *
 *	sl2vl
-*		SL2VL Mapping table (IBA 7.6.6) template.
+*		SL2VL Mapping table (IBA 7.6.6) template. (NULL == use default)
 *
 */
 
diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c
index 1679ae0..b451c25 100644
--- a/opensm/opensm/osm_qos.c
+++ b/opensm/opensm/osm_qos.c
@@ -382,7 +382,11 @@ static void qos_build_config(struct qos_config *cfg,
 	memset(cfg, 0, sizeof(*cfg));
 
 	cfg-max_vls = opt-max_vls  0 ? opt-max_vls : dflt-max_vls;
-	cfg-vl_high_limit = (uint8_t) opt-high_limit;
+
+	if (opt-high_limit = 0)
+		cfg-vl_high_limit = (uint8_t) opt-high_limit;
+	else
+		cfg-vl_high_limit = (uint8_t) dflt-high_limit;
 
 	p = opt-vlarb_high ? opt-vlarb_high : dflt-vlarb_high;
 	for (i = 0; i  2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index 0422d0f..ab2ff9c 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -370,6 +370,15 @@ static void subn_set_default_qos_options(IN 

Re: [ofa-general] [opensm patch][1/2] fix qos config parsing bugs

2008-11-10 Thread Al Chu
On Mon, 2008-11-10 at 13:16 -0800, Al Chu wrote:
 Hey Sasha,
 
 New patch w/ proper signed off by line.

Argh.  Repost, w/ right Author.  Sorry.

Al

 Al
 
 On Thu, 2008-10-30 at 15:01 -0700, Al Chu wrote:
  Hey Sasha,
  
  I found a bunch of qos config parsing issues, listed below:
  
  1)
  
  If the user sets the qos default fields (i.e. qos_high_limit,
  qos_vlarb_high. etc.), but do not have the qos_ca, qos_swe, qos_rtr,
  etc. equivalent fields listed (i.e. qos_ca_high_limit,
  qos_sw0_vlarb_high), the values set in teh qos default fields are not
  loaded into the CAs, switches, etc.  The reason is in qos_build_config()
  we load defaults like this:
  
  p = opt-vlarb_high ? opt-vlarb_high : dflt-vlarb_high;
  
  but we always set the fields to something non-NULL.
  
  static void subn_set_default_qos_options(IN osm_qos_options_t * opt)
  {
  opt-max_vls = OSM_DEFAULT_QOS_MAX_VLS;
  opt-high_limit = OSM_DEFAULT_QOS_HIGH_LIMIT;
  opt-vlarb_high = OSM_DEFAULT_QOS_VLARB_HIGH;
  opt-vlarb_low = OSM_DEFAULT_QOS_VLARB_LOW;
  opt-sl2vl = OSM_DEFAULT_QOS_SL2VL;
  }
  
  2)
  
  In qos_build_config() we load the high_limit like this:
  
  cfg-vl_high_limit = (uint8_t) opt-high_limit;
  
  So there is no way to tell the qos_ca, qos_swe, qos_rtr, etc. high_limit
  options to go back to the default high_limit.  It just assumes that
  whatever is input (or was set by default) is what you should use.
  
  3)
  
  Some fields like qos_vlarb_high are assumed to be correctly set and can
  segfault opensm.
  
  The attached patch fixes these up.  Obviously there's tons of ways to
  do this.  I decided to ...
  
  A) only initialization qos_options to the real defaults
  
  B) init all qos_*_options to sentinel values (-1, NULL, etc.) to
  indicate it should use the configured defaults if they aren't set by the
  user.  The high_limit was changed from an unsigned to an int b/c 0 is a
  valid high_limit value.
  
  C) verify that the default qos inputs are definitely correct (i.e. can't
  be NULL).  Reset to hard coded defaults if need be.
  
  D) load the default vs. non-default appropriately in QoS.
  
  Al
  
  P.S.  This patch does not rely on my previous remove qos_max_vls
  config patch.  I assume we're keeping the max_vls fields in this patch.
  
  ___
  general mailing list
  general@lists.openfabrics.org
  http://  lists.openfabrics.org/cgi-bin/mailman/listinfo/general
  
  To unsubscribe, please visit http://  
  openib.org/mailman/listinfo/openib-general
 ___
 general mailing list
 general@lists.openfabrics.org
 http:// lists.openfabrics.org/cgi-bin/mailman/listinfo/general
 
 To unsubscribe, please visit http:// 
 openib.org/mailman/listinfo/openib-general
-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From eb6d045bab61f77ef04abcd0b73fc712b53aa8aa Mon Sep 17 00:00:00 2001
From: Albert Chu [EMAIL PROTECTED]
Date: Mon, 10 Nov 2008 13:10:13 -0800
Subject: [PATCH] fix qos config parsing bugs


Signed-off-by: Albert Chu [EMAIL PROTECTED]
---
 opensm/include/opensm/osm_subnet.h |   12 +-
 opensm/opensm/osm_qos.c|6 +-
 opensm/opensm/osm_subnet.c |  467 ++--
 3 files changed, 293 insertions(+), 192 deletions(-)

diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 7259587..11063b7 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -99,7 +99,7 @@ struct osm_qos_policy;
 */
 typedef struct osm_qos_options {
 	unsigned max_vls;
-	unsigned high_limit;
+	int high_limit;
 	char *vlarb_high;
 	char *vlarb_low;
 	char *sl2vl;
@@ -108,20 +108,20 @@ typedef struct osm_qos_options {
 * FIELDS
 *
 *	max_vls
-*		The number of maximum VLs on the Subnet
+*		The number of maximum VLs on the Subnet (0 == use default)
 *
 *	high_limit
 *		The limit of High Priority component of VL Arbitration
-*		table (IBA 7.6.9)
+*		table (IBA 7.6.9) (-1 == use default)
 *
 *	vlarb_high
-*		High priority VL Arbitration table template.
+*		High priority VL Arbitration table template. (NULL == use default)
 *
 *	vlarb_low
-*		Low priority VL Arbitration table template.
+*		Low priority VL Arbitration table template. (NULL == use default)
 *
 *	sl2vl
-*		SL2VL Mapping table (IBA 7.6.6) template.
+*		SL2VL Mapping table (IBA 7.6.6) template. (NULL == use default)
 *
 */
 
diff --git a/opensm/opensm/osm_qos.c b/opensm/opensm/osm_qos.c
index 1679ae0..b451c25 100644
--- a/opensm/opensm/osm_qos.c
+++ b/opensm/opensm/osm_qos.c
@@ -382,7 +382,11 @@ static void qos_build_config(struct qos_config *cfg,
 	memset(cfg, 0, sizeof(*cfg));
 
 	cfg-max_vls = opt-max_vls  0 ? opt-max_vls : dflt-max_vls;
-	cfg-vl_high_limit = (uint8_t) opt-high_limit;
+
+	if (opt-high_limit = 0)
+		cfg-vl_high_limit = (uint8_t) 

Re: [ofa-general] Re: [opensm patch][2/2] verify config inputs when config file is rescanned

2008-11-10 Thread Al Chu
Hey Sasha,

Sorry, repost, w/ the right Author.

Al

On Mon, 2008-11-10 at 13:15 -0800, Al Chu wrote:
 On Mon, 2008-11-10 at 23:02 +0200, Sasha Khapyorsky wrote:
  Hi Al,
  
  On 15:01 Thu 30 Oct , Al Chu wrote:
   Hey Sasha,
   
   I noticed that after the config file is rescanned, the new potential
   inputs aren't checked for validity.  Patch is attached.
   
   Al
   
   -- 
   Albert Chu
   [EMAIL PROTECTED]
   Computer Scientist
   High Performance Systems Division
   Lawrence Livermore National Laboratory
  
   From edfcd2de96c3525d1609b4c0f03c17ecc0495c18 Mon Sep 17 00:00:00 2001
   From: root [EMAIL PROTECTED](none)
   Date: Thu, 30 Oct 2008 13:58:55 -0700
   Subject: [PATCH] verify rescanned config input
   
   
   Signed-off-by: root [EMAIL PROTECTED](none)
   
  
  I'm fine with this patch, but could you fix S-O-B line? Thanks.
 
 Oops.  New one is attached (I'll repost the [1/2] patch too).
 
 Al
 
  Sasha
 ___
 general mailing list
 general@lists.openfabrics.org
 http:// lists.openfabrics.org/cgi-bin/mailman/listinfo/general
 
 To unsubscribe, please visit http:// 
 openib.org/mailman/listinfo/openib-general
-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
From a9f7ea0b667ff32a029593e954286c349fe499e7 Mon Sep 17 00:00:00 2001
From: Albert Chu [EMAIL PROTECTED]
Date: Mon, 10 Nov 2008 13:10:25 -0800
Subject: [PATCH] verify rescanned config input


Signed-off-by: Albert Chu [EMAIL PROTECTED]
---
 opensm/opensm/osm_subnet.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index ab2ff9c..5cf9c33 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -71,6 +71,8 @@
 
 static const char null_str[] = (null);
 
+static void subn_verify_conf_file(IN osm_subn_opt_t * const p_opts);
+
 /**
  **/
 void osm_subn_construct(IN osm_subn_t * const p_subn)
@@ -852,6 +854,8 @@ int osm_subn_rescan_conf_files(IN osm_subn_t * const p_subn)
 	}
 	fclose(opts_file);
 
+	subn_verify_conf_file(p_subn-opt);
+
 	osm_parse_prefix_routes_file(p_subn);
 
 	return 0;
-- 
1.5.4.5

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Re: [ofa-general] Re: [opensm patch] support dump_conf command in opensm console

2008-11-10 Thread Al Chu
Hey Sasha,

Sorry.  Repost patch w/ the right Author.

Al

On Mon, 2008-11-10 at 13:03 -0800, Al Chu wrote:
 Hey Sasha,
 
 Attached is the re-worked patch.  Assumes changes from my fix qos
 config parsing bugs patch are accepted.
 
 Al
 
 On Mon, 2008-11-10 at 09:42 -0800, Al Chu wrote:
  Hey Sasha,
  
  On Sun, 2008-11-09 at 19:25 +0200, Sasha Khapyorsky wrote:
   Hi Al,
   
   On 16:39 Mon 03 Nov , Al Chu wrote:
Hey Sasha,

When config files are rescanned and loaded, there's no way to know if
the right configuration was actually reloaded or not.  A console command
to dump the current config is a useful way to verify the loading of new
configs or not.

This patch assumes the fixes from my fix qos config parsing bugs is
accepted.
   
   Didn't pass over it, sorry about delay.
   

Al

-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
   
From 249607e47ec7ef1b92f9578cece90460418d12b8 Mon Sep 17 00:00:00 2001
From: Albert Chu [EMAIL PROTECTED]
Date: Mon, 3 Nov 2008 16:22:29 -0800
Subject: [PATCH] support dump_conf console command


Signed-off-by: Albert Chu [EMAIL PROTECTED]
---
 opensm/opensm/osm_console.c |  158 
+++
 1 files changed, 158 insertions(+), 0 deletions(-)

diff --git a/opensm/opensm/osm_console.c b/opensm/opensm/osm_console.c
index d9bbbc2..8422655 100644
--- a/opensm/opensm/osm_console.c
+++ b/opensm/opensm/osm_console.c
@@ -53,6 +53,10 @@
 #include complib/cl_passivelock.h
 #include opensm/osm_perfmgr.h
 
+#define NULL_STR (null)
+
+#define BOOLEAN_STR(__b) ((__b) ? TRUE : FALSE)
+
 struct command {
char *name;
void (*help_function) (FILE * out, int detail);
@@ -189,6 +193,14 @@ static void help_lidbalance(FILE * out, int detail)
}
 }
 
+static void help_dump_conf(FILE *out, int detail)
+{
+   fprintf(out, dump_conf\n);
+   if (detail) {
+   fprintf(out, dump current opensm configuration\n);
+   }
+}
+
 #ifdef ENABLE_OSM_PERF_MGR
 static void help_perfmgr(FILE * out, int detail)
 {
@@ -1136,6 +1148,151 @@ static void perfmgr_parse(char **p_last, 
osm_opensm_t * p_osm, FILE * out)
 }
 #endif /* ENABLE_OSM_PERF_MGR */
 
+static void dump_qos_options(osm_qos_options_t * opt,
+osm_qos_options_t * dflt, 
+char *prefix,
+FILE * out)
+{
+   fprintf(out, %s_max_vls : %u\n,
+   prefix, opt-max_vls ? opt-max_vls : dflt-max_vls);
+   fprintf(out, %s_high_limit : %u\n,
+   prefix, opt-high_limit = 0 ? 
(unsigned)opt-high_limit : (unsigned)dflt-high_limit);
+   fprintf(out, %s_vlarb_high : %s\n,
+   prefix, opt-vlarb_high ? opt-vlarb_high : 
dflt-vlarb_high);
+   fprintf(out, %s_vlarb_low : %s\n,
+   prefix, opt-vlarb_low ? opt-vlarb_low : 
dflt-vlarb_low);
+   fprintf(out, %s_sl2vl : %s\n,
+   prefix, opt-sl2vl ? opt-sl2vl : dflt-sl2vl);
+}
+
+static void dump_conf_parse(char **p_last, osm_opensm_t * p_osm, FILE 
* out)
+{
   
   Why to not use osm_subn_write_conf_file() function (wrapped by
   dump_conf_parse())? I think we need to have config dumping code
   consolidated.
  
  I had thought of that, but I didn't want all of the instructions and all
  the extra lines of output.  But I guess it's not that big of a deal in
  the end.  I'll send a new patch.
  
  Al
  
   Sasha
   
+   osm_subn_opt_t * opt = p_osm-subn.opt;
+
+   fprintf(out, config_file : %s\n, 
+   opt-config_file ? opt-config_file : NULL_STR);
+   fprintf(out, guid : 0x%016 PRIx64 \n, opt-guid);
+   fprintf(out, m_key : 0x%016 PRIx64 \n, opt-m_key);
+   fprintf(out, sm_key : 0x%016 PRIx64 \n, opt-sm_key);
+   fprintf(out, sa_key : 0x%016 PRIx64 \n, opt-sa_key);
+   fprintf(out, subnet_prefix : 0x%016 PRIx64 \n, 
opt-subnet_prefix);
+   fprintf(out, m_key_lease_period : %u\n, 
opt-m_key_lease_period);
+   fprintf(out, sweep_interval : %u\n, opt-sweep_interval);
+   fprintf(out, max_wire_smps : %u\n, opt-max_wire_smps);
+   fprintf(out, transaction_timeout : %u\n, 
opt-transaction_timeout);
+   fprintf(out, sm_priority : %u\n, opt-sm_priority);
+   fprintf(out, lmc : %u\n, opt-lmc);
+   fprintf(out, lmc_esp0 : %s\n, 
+   BOOLEAN_STR(opt-lmc_esp0));
+   fprintf(out, max_op_vls : %u\n, opt-max_op_vls);
+   fprintf(out, force_link_speed : %u\n, opt-force_link_speed);
+  

[ofa-general] Higher than usual latency (new baby)

2008-11-10 Thread Roland Dreier
Hi everyone,

My wife gave birth to a son on November 6.  Everyone is healthy and
doing well.  But for obvious reasons you should expect me to be a lot
less responsive than usual for the next few weeks.

Thanks,
  Roland
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


Re: [ofa-general] [PATCH] ipoib: null tx/rx_ring skb pointers on free

2008-11-10 Thread Pradeep Satyanarayana
Or Gerlitz wrote:
 Pradeep Satyanarayana wrote:
 If I am not mistaken we saw a problem that showed similar
 characteristics more than two years ago on IBM platforms. The same
 issue of rx_ring reusing tx_ring skbs and so on and would show up only
 under stress. This was with UD mode (before CM came into the picture)
 and it turned out to be a driver issue. 
 Can you send pointer to the relevant thread / commit that solved this
 issue?
Or,

Even though I searched in the archives could not locate that particular one.
I know that Nam submitted the patch and it was in the June/July 2006 time 
frame. 
It was a missing read memory barrier in the ehca driver. I am copying him so 
that 
he might help.

Pradeep

___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general


[ofa-general] [PATCH] opensm: skeleton for toroidal mesh analysis

2008-11-10 Thread Robert Pearson
Sasha, 

Here is the first patch in a series to implement the algorithm described in
the file lash_changes.doc.

This patch
  - creates a new command line flag --do_mesh_analysis and a new Boolean
that is set if the flag is used.
  - adds code to main to implement the flag and option.
  - creates a new file osm_mesh.c to hold the algorithm code
  - moves declarations from osm_ucast_lash.c and osm_mesh.c into header
files
  - adds these files to Makefile.am
  - adds a stub do_mesh_analysis() that is called from lash_core.

Signed-off-by: Bob Pearson [EMAIL PROTECTED]

-

diff --git a/opensm/include/opensm/osm_mesh.h
b/opensm/include/opensm/osm_mesh.h
new file mode 100644
index 000..1467440
--- /dev/null
+++ b/opensm/include/opensm/osm_mesh.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2088  System Fabric Works, Inc.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * Abstract:
+ *  Declarations for mesh analysis
+ */
+
+#ifndef OSM_UCAST_MESH_H
+#define OSM_UCAST_MESH_H
+
+struct _lash;
+
+int do_mesh_analysis(struct _lash *p_lash);
+
+#endif
diff --git a/opensm/include/opensm/osm_subnet.h
b/opensm/include/opensm/osm_subnet.h
index 7259587..2abe36d 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -215,6 +215,7 @@ typedef struct osm_subn_opt {
char *node_name_map_name;
char *prefix_routes_file;
boolean_t consolidate_ipv6_snm_req;
+   boolean_t do_mesh_analysis;
 } osm_subn_opt_t;
 /*
 * FIELDS
diff --git a/opensm/include/opensm/osm_ucast_lash.h
b/opensm/include/opensm/osm_ucast_lash.h
new file mode 100644
index 000..646e9a3
--- /dev/null
+++ b/opensm/include/opensm/osm_ucast_lash.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2008  System Fabric Works, Inc.
+ * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
+ * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
+ * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
+ * Copyright (c) 2007  Simula Research Laboratory. All rights reserved.
+ * Copyright (c) 2007  Silicon Graphics Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *copyright notice, this list of conditions and the following
+ *disclaimer in the documentation and/or other materials
+ *provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * Abstract:
+ *  Declarations for LASH algorithm
+ */
+
+#ifndef OSM_UCAST_LASH_H

[ofa-general] {PATCH] [2] opensm: per mesh data

2008-11-10 Thread Robert Pearson
Sasha,

Here is the second patch implementing the mesh analysis algorithm.

This patch:
  - creates a data structure, mesh_t, that holds per mesh information
  - adds a pointer to this structure in lash_t
  - creates methods to allocate and free memory for mesh_t
  - adds osm_ prefix to global routine names (oops)
  - calls create and cleanup methods

Regards,

Bob Pearson

Signed-off-by: Bob Pearson [EMAIL PROTECTED]

diff --git a/opensm/include/opensm/osm_mesh.h
b/opensm/include/opensm/osm_mesh.h
index 1467440..8313614 100644
--- a/opensm/include/opensm/osm_mesh.h
+++ b/opensm/include/opensm/osm_mesh.h
@@ -41,6 +41,18 @@
 
 struct _lash;
 
-int do_mesh_analysis(struct _lash *p_lash);
+/*
+ * per fabric mesh info
+ */
+typedef struct _mesh {
+   int num_class;  /* number of switch classes */
+   int *class_type;/* index of first switch found for
each class */
+   int *class_count;   /* population of each class */
+   int dimension;  /* mesh dimension */
+   int *size;  /* an array to hold size of mesh */
+} mesh_t;
+
+void osm_mesh_cleanup(struct _lash *p_lash);
+int osm_do_mesh_analysis(struct _lash *p_lash);
 
 #endif
diff --git a/opensm/include/opensm/osm_ucast_lash.h
b/opensm/include/opensm/osm_ucast_lash.h
index 646e9a3..1ae3bb6 100644
--- a/opensm/include/opensm/osm_ucast_lash.h
+++ b/opensm/include/opensm/osm_ucast_lash.h
@@ -95,6 +95,7 @@ typedef struct _lash {
cdg_vertex_t cdg_vertex_matrix;
int *num_mst_in_lane;
int ***virtual_location;
+   mesh_t *mesh;
 } lash_t;
 
 #endif
diff --git a/opensm/opensm/osm_mesh.c b/opensm/opensm/osm_mesh.c
index 7943274..c97925b 100644
--- a/opensm/opensm/osm_mesh.c
+++ b/opensm/opensm/osm_mesh.c
@@ -41,6 +41,7 @@
 #endif /* HAVE_CONFIG_H */
 
 #include stdio.h
+#include stdlib.h
 #include opensm/osm_switch.h
 #include opensm/osm_opensm.h
 #include opensm/osm_log.h
@@ -48,15 +49,72 @@
 #include opensm/osm_ucast_lash.h
 
 /*
+ * osm_mesh_cleanup - free per mesh resources
+ */
+void osm_mesh_cleanup(lash_t *p_lash)
+{
+   mesh_t *mesh = p_lash-mesh;
+
+   if (mesh) {
+   if (mesh-class_type)
+   free(mesh-class_type);
+
+   if (mesh-class_count)
+   free(mesh-class_count);
+
+   free(mesh);
+
+   p_lash-mesh = NULL;
+   }
+}
+
+/*
+ * mesh_create - allocate per mesh resources
+ */
+static int mesh_create(lash_t *p_lash)
+{
+   osm_log_t *p_log = p_lash-p_osm-log;
+   mesh_t *mesh;
+
+   if(!(mesh = p_lash-mesh = calloc(1, sizeof(mesh_t {
+   OSM_LOG(p_log, OSM_LOG_ERROR, Failed allocating mesh - out
of memory\n);
+   return -1;
+   }
+
+   if (!(mesh-class_type = calloc(p_lash-num_switches, sizeof(int
{
+   OSM_LOG(p_log, OSM_LOG_ERROR, Failed allocating
mesh-class_type - out of memory\n);
+   free(mesh);
+   return -1;
+   }
+
+   if (!(mesh-class_count = calloc(p_lash-num_switches,
sizeof(int {
+   OSM_LOG(p_log, OSM_LOG_ERROR, Failed allocating
mesh-class_count - out of memory\n);
+   free(mesh-class_type);
+   free(mesh);
+   return -1;
+   }
+
+   return 0;
+}
+
+/*
  * do_mesh_analysis
  */
-int do_mesh_analysis(lash_t *p_lash)
+int osm_do_mesh_analysis(lash_t *p_lash)
 {
int ret = 0;
osm_log_t *p_log = p_lash-p_osm-log;
 
OSM_LOG_ENTER(p_log);
 
+   /*
+* allocate per mesh data structures
+*/
+   if (mesh_create(p_lash)) {
+   OSM_LOG_EXIT(p_log);
+   return -1;
+   }
+
printf(lash: do_mesh_analysis stub called\n);
 
OSM_LOG_EXIT(p_log);
diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
index e10371c..3577cca 100644
--- a/opensm/opensm/osm_ucast_lash.c
+++ b/opensm/opensm/osm_ucast_lash.c
@@ -825,7 +825,7 @@ static int lash_core(lash_t * p_lash)
 
OSM_LOG_ENTER(p_log);
 
-   if (p_lash-p_osm-subn.opt.do_mesh_analysis 
do_mesh_analysis(p_lash)) {
+   if (p_lash-p_osm-subn.opt.do_mesh_analysis 
osm_do_mesh_analysis(p_lash)) {
OSM_LOG(p_log, OSM_LOG_ERROR, Mesh analysis failed\n);
goto Exit;
}
@@ -1124,6 +1124,8 @@ static void lash_cleanup(lash_t * p_lash)
free(p_lash-switches);
}
p_lash-switches = NULL;
+
+   osm_mesh_cleanup(p_lash);
 }
 
 /*
___
general mailing list
general@lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general