I updated the HCA "InfiniBand: Mellanox Technologies: Unknown device 634a (rev a0)" to the latest firmware and issue remains. "fw_ver" is now 2.6.000.
Any ideas on why the time out is occuring in the function? 203<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l203>static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, 204<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l204> int out_is_imm, u32 in_modifier, u8 op_modifier, 205<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l205> u16 op, unsigned long timeout) 206<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l206>{ 207<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l207> struct mlx4_priv *priv = mlx4_priv(dev); 208<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l208> void __iomem *hcr = priv->cmd.hcr; 209<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l209> int err = 0; 210<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l210> unsigned long end; 211<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l211> 212<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l212> down(&priv->cmd.poll_sem); 213<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l213> 214<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l214> err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, 215<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l215> in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); 216<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l216> if (err) 217<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l217> goto out; 218<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l218> 219<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l219> end = msecs_to_jiffies(timeout) + jiffies; 220<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l220> while (cmd_pending(dev) && time_before(jiffies, end)) 221<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l221> cond_resched(); 222<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l222> 223<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l223> if (cmd_pending(dev)) { 224<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l224> err = - ETIMEDOUT; 225<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l225> goto out; 226<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l226> } 227<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l227> 228<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l228> if (out_is_imm) 229<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l229> *out_param = 230<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l230> (u64) be32_to_cpu((__force __be32) 231<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l231> __raw_readl(hcr + HCR_OUT_PARAM_OFFSET)) << 32 | 232<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l232> (u64) be32_to_cpu((__force __be32) 233<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l233> __raw_readl(hcr + HCR_OUT_PARAM_OFFSET + 4)); 234<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l234> 235<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l235> err = mlx4_status_to_errno(be32_to_cpu((__force __be32) 236<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l236> __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24); 237<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l237> 238<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l238>out: 239<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l239> up(&priv->cmd.poll_sem); 240<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l240> return err; 241<http://git.kernel.org/?p=linux/kernel/git/stable/linux-2.6.24.y.git;a=blob;f=drivers/net/mlx4/cmd.c;h=db49051b97b168b7a004e52c865bef94311ce56a;hb=master#l241>} On Sat, Mar 7, 2009 at 12:55 AM, Dotan Barak <[email protected]> wrote: > Please make sure that you have the latest firmware for the HCA that has the > failure. > > Dotan > > > Phillip Wilson wrote: > >> This is related to the thread I stared on Feb 24 >> >> Re: [ofa-general] ***SPAM*** Mellanox ibv_reg_mr (memory region) function >> call fails under load when using the mlx4 driver >> So far I have modified the "num_mtt" to 1 << 21, as Dotan suggested, but >> I will try some more values this weekend. >> I think the code for this is in ../drivers/net/mlx4/cmd.c mlx4_cmd( ... >> ) >> -ETIMEOUT > System Information: >> > >> > >> > >> > The system has 4GB of memory. >> > >> > >> > >> > uname -a >> > >> > Linux (none) 2.6.24.02.02.08 #21 SMP Thu Feb 19 11:04:35 PST 2009 ia64 >> > unknown >> > >> > >> > >> > OFED 1.2.5 >> > >> > >> > >> > lspci -d 15b3: >> > >> > >> > >> > 0000:10:00.0 InfiniBand: Mellanox Technologies MT25208 InfiniHost III Ex >> > (Tavor compatibility mode) (rev 20) >> > >> > 0000:c3:00.0 InfiniBand: Mellanox Technologies: Unknown device 634a (rev >> a0) >> > >> > >> > >> > lspci -d 15b3: -n >> > >> > 0000:10:00.0 0c06: 15b3:6278 (rev 20) >> > >> > 0000:c3:00.0 0c06: 15b3:634a (rev a0) >> > >> > >> > >> > ibv_devinfo -v >> > >> > hca_id: mlx4_0 >> > >> > fw_ver: 2.5.000 >> > >> > >> > >> > hca_id: mthca0 >> > >> > fw_ver: 4.8.930 >> > >
_______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
