Applied to trunk and branch. Thanks Tzachi
> -----Original Message----- > From: Alex Naslednikov > Sent: Thursday, September 04, 2008 7:40 PM > To: Alex Naslednikov; Tzachi Dar; [email protected] > Cc: Ishai Rabinovitz > Subject: [ofw] [PATCH 3/3] Mlx4 (checksum): improving the > mechanism of csum offload > > This is cumulative patch that fixes all open issues shown the > last week: > SGE wrong calculation > LSO messages in log files > Fixes in ipoib csum offload mechanism > Adding 3 states for CSUM offload : disabled, enabled if > supported by HW and bypass (see inline) > > Signed-off by: xalex (Alexander Naslednikov) > Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_port.c > =================================================================== > --- > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib > _port.c (revision 3106) > +++ > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib > _port.c (revision 3107) > @@ -825,6 +825,8 @@ > uint64_t vaddr; > net32_t rkey; > ib_qp_attr_t qp_attr; > + ib_ca_attr_t * p_ca_attr; > + uint32_t attr_size; > > IPOIB_ENTER( IPOIB_DBG_INIT ); > > @@ -889,7 +891,7 @@ > p_port->p_adapter->p_ifc->get_err_str( > status )) ); > return status; > } > - > + > /* Allocate the QP. */ > cl_memclr( &qp_create, sizeof(qp_create) ); > qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM; @@ -897,8 +899,47 @@ > qp_create.rq_sge = 2; /* To support buffers spanning pages. */ > qp_create.h_rq_cq = p_port->ib_mgr.h_recv_cq; > qp_create.sq_depth = p_port->p_adapter->params.sq_depth; > - //TODO: Figure out the right number of SGE entries for sends. > - qp_create.sq_sge = MAX_SEND_SGE; > + > + //Figure out the right number of SGE entries for sends. > + /* Get the size of the CA attribute structure. */ > + status = p_port->p_adapter->p_ifc->query_ca( > p_port->ib_mgr.h_ca, NULL, &attr_size ); > + if( status != IB_INSUFFICIENT_MEMORY ) > + { > + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, > + ("ib_query_ca failed with status > %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) ); > + return status; > + } > + > + /* Allocate enough space to store the attribute structure. */ > + p_ca_attr = cl_malloc( attr_size ); > + if( !p_ca_attr ) > + { > + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, > + ("cl_malloc failed to allocate p_ca_attr!\n") ); > + return IB_INSUFFICIENT_RESOURCES; > + } > + > + /* Query the CA attributes. */ > + status = > p_port->p_adapter->p_ifc->query_ca(p_port->ib_mgr.h_ca, > p_ca_attr, &attr_size ); > + if( status != IB_SUCCESS ) > + { > + cl_free( p_ca_attr ); > + > + IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR, > + ("ib_query_ca failed with status > %s.\n", p_port->p_adapter->p_ifc->get_err_str(status)) ); > + return status; > + } > +#define UD_QP_USED_SGE 3 > + qp_create.sq_sge = MAX_SEND_SGE < p_ca_attr->max_sges ? > MAX_SEND_SGE : (p_ca_attr->max_sges - UD_QP_USED_SGE); > + if (!p_ca_attr->ipoib_csum) { > + //checksum is not supported by device > + //user must specify BYPASS to explicitly cancel > checksum calculation > + if > (p_port->p_adapter->params.send_chksum_offload == CSUM_ENABLED) > + > p_port->p_adapter->params.send_chksum_offload = CSUM_DISABLED; > + if > (p_port->p_adapter->params.recv_chksum_offload == CSUM_ENABLED) > + > p_port->p_adapter->params.recv_chksum_offload = CSUM_DISABLED; > + } > + > qp_create.h_sq_cq = p_port->ib_mgr.h_send_cq; > qp_create.sq_signaled = TRUE; > status = p_port->p_adapter->p_ifc->create_qp( > @@ -1937,7 +1978,7 @@ > > } > /* Successful completion. Get the receive > information. */ > - p_desc->ndis_csum.Value = (ULONG) p_wc->csum_ok; > + p_desc->ndis_csum.Value = (ULONG) p_wc->recv.ud.csum_ok; > cl_perf_start( GetRecvEndpts ); > __recv_get_endpts( p_port, p_desc, p_wc, > &p_src, &p_dst ); > cl_perf_stop( &p_port->p_adapter->perf, > GetRecvEndpts ); @@ -2580,10 +2621,22 @@ > ("__buf_mgr_get_ndis_pkt failed\n") ); > return IB_INSUFFICIENT_RESOURCES; > } > - > + if (p_port->p_adapter->params.recv_chksum_offload != > CSUM_BYPASS) { > /* Get the checksums directly from packet information. */ > - NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, > TcpIpChecksumPacketInfo ) = > - (PVOID) (uintn_t) (p_desc->ndis_csum.Value); > + /* In this case, no one of cheksum's cat get false value */ > + /* If hardware checksum failed or wasn't calculated, > NDIS will recalculate it again */ > + NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, > TcpIpChecksumPacketInfo ) = > + (PVOID) (uintn_t) (p_desc->ndis_csum.Value); > + } else { > + NDIS_TCP_IP_CHECKSUM_PACKET_INFO chksum; > + /* Flag the checksums as having been calculated. */ > + chksum.Value = 0; > + chksum.Receive.NdisPacketTcpChecksumSucceeded = TRUE; > + chksum.Receive.NdisPacketUdpChecksumSucceeded = TRUE; > + chksum.Receive.NdisPacketIpChecksumSucceeded = TRUE; > + NDIS_PER_PACKET_INFO_FROM_PACKET( *pp_packet, > TcpIpChecksumPacketInfo ) = > + (void*)(uintn_t)chksum.Value; > + } > ipoib_inc_recv_stat( p_port->p_adapter, type, p_desc->len ); > > IPOIB_EXIT( IPOIB_DBG_RECV ); > @@ -3833,9 +3886,9 @@ > p_desc->wr.dgrm.ud.hlen = lso_header_size; > // Tell NDIS how much we will send. > PktExt->NdisPacketInfo[TcpLargeSendPacketInfo] > = UlongToPtr(PacketLength); > - p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM > | IB_SEND_OPT_TX_TCP_UDP_CSUM); > + p_desc->wr.send_opt |= (IB_SEND_OPT_TX_IP_CSUM | > +IB_SEND_OPT_TX_TCP_UDP_CSUM) | IB_SEND_OPT_SIGNALED; > __send_gen(p_port, p_desc, IndexOfData); > - p_desc->wr.wr_type = ( WR_LSO | IB_SEND_OPT_SIGNALED); > + p_desc->wr.wr_type = WR_LSO; > } else { > > /* Setup the first local data segment (used for > the IPoIB header). */ > Index: > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_adapter.h > =================================================================== > --- > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad > apter.h (revision 3106) > +++ > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_ad > apter.h (revision 3107) > @@ -60,15 +60,15 @@ > /* > * Macros > */ > +typedef enum {CSUM_DISABLED = 0, CSUM_ENABLED, CSUM_BYPASS} > +tCsumTypeEn; > > - > typedef struct _ipoib_params > { > int32_t rq_depth; > int32_t rq_low_watermark; > int32_t sq_depth; > - boolean_t send_chksum_offload; > - boolean_t recv_chksum_offload; > + int32_t send_chksum_offload; //is actually of > type tCsumTypeEn > + int32_t recv_chksum_offload; //is actually of > type tCsumTypeEn > uint32_t sa_timeout; > uint32_t sa_retry_cnt; > uint32_t recv_pool_ratio; > @@ -91,12 +91,11 @@ > * Number of send WQEs to allocate. > * > * send_chksum_offload > -* Flag to indicate whether to offload send > checksums. This will make it > -* so that IPoIB packets should never be forwarded > out of the IB subnet > -* without recalculating the checksum. > -* > * recv_chksum_offload > -* Flag to indicate whether to offload recv checksums. > +* Flags to indicate whether to offload send/recv > checksums. > +* 0 - No hardware cheksum > +* 1 - Try to offload if the device support it > +* 2 - Always report success (checksum bypass) > * > * wsdp_enabled > * Flag to indicate whether WSDP is enabled for an > adapter adapter. > Index: > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_driver.c > =================================================================== > --- > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d > river.c (revision 3106) > +++ > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/ipoib_d > river.c (revision 3107) > @@ -150,8 +150,8 @@ > {NDIS_STRING_CONST("RqDepth"), 1, > IPOIB_OFFSET(rq_depth), IPOIB_SIZE(rq_depth), > 512, 128, 1024}, > {NDIS_STRING_CONST("RqLowWatermark"), 0, > IPOIB_OFFSET(rq_low_watermark), > IPOIB_SIZE(rq_low_watermark), 4, 2, 8}, > {NDIS_STRING_CONST("SqDepth"), 1, > IPOIB_OFFSET(sq_depth), IPOIB_SIZE(sq_depth), > 512, 128, 1024}, > - {NDIS_STRING_CONST("SendChksum"), 1, > IPOIB_OFFSET(send_chksum_offload), > IPOIB_SIZE(send_chksum_offload),0, 0, 1}, > - {NDIS_STRING_CONST("RecvChksum"), 1, > IPOIB_OFFSET(recv_chksum_offload), > IPOIB_SIZE(recv_chksum_offload),0, 0, 1}, > + {NDIS_STRING_CONST("SendChksum"), 1, > IPOIB_OFFSET(send_chksum_offload), > IPOIB_SIZE(send_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU > M_BYPASS}, > + {NDIS_STRING_CONST("RecvChksum"), 1, > IPOIB_OFFSET(recv_chksum_offload), > IPOIB_SIZE(recv_chksum_offload),CSUM_ENABLED,CSUM_DISABLED,CSU > M_BYPASS}, > {NDIS_STRING_CONST("SaTimeout"), 1, > IPOIB_OFFSET(sa_timeout), IPOIB_SIZE(sa_timeout), > 1000, 250, UINT_MAX}, > {NDIS_STRING_CONST("SaRetries"), 1, > IPOIB_OFFSET(sa_retry_cnt), > IPOIB_SIZE(sa_retry_cnt), 10, 1, UINT_MAX}, > {NDIS_STRING_CONST("RecvRatio"), 1, > IPOIB_OFFSET(recv_pool_ratio), > IPOIB_SIZE(recv_pool_ratio), 1, 1, 10}, > @@ -1450,28 +1450,20 @@ > p_offload_task->TaskBufferLength = > sizeof(NDIS_TASK_TCP_IP_CHECKSUM); > p_offload_chksum = > (NDIS_TASK_TCP_IP_CHECKSUM*)p_offload_task->TaskBuffer; > - > + > p_offload_chksum->V4Transmit.IpOptionsSupported = > - p_adapter->params.send_chksum_offload; > p_offload_chksum->V4Transmit.TcpOptionsSupported = > - p_adapter->params.send_chksum_offload; > p_offload_chksum->V4Transmit.TcpChecksum = > - p_adapter->params.send_chksum_offload; > p_offload_chksum->V4Transmit.UdpChecksum = > - p_adapter->params.send_chksum_offload; > p_offload_chksum->V4Transmit.IpChecksum = > - p_adapter->params.send_chksum_offload; > + !!(p_adapter->params.send_chksum_offload); > > p_offload_chksum->V4Receive.IpOptionsSupported = > - p_adapter->params.recv_chksum_offload; > p_offload_chksum->V4Receive.TcpOptionsSupported = > - p_adapter->params.recv_chksum_offload; > p_offload_chksum->V4Receive.TcpChecksum = > - p_adapter->params.recv_chksum_offload; > p_offload_chksum->V4Receive.UdpChecksum = > - p_adapter->params.recv_chksum_offload; > p_offload_chksum->V4Receive.IpChecksum = > - p_adapter->params.recv_chksum_offload; > + !!(p_adapter->params.recv_chksum_offload); > > p_offload_chksum->V6Transmit.IpOptionsSupported = FALSE; > p_offload_chksum->V6Transmit.TcpOptionsSupported = FALSE; > Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netipoib.inx > =================================================================== > --- > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip > oib.inx (revision 3106) > +++ > D:/Windows/MLNX_WINOF_CSUM_ARBEL/ulp/ipoib/kernel/netip > oib.inx (revision 3107) > @@ -95,18 +95,28 @@ > > HKR, Ndi\Params\SendChksum, ParamDesc, 0, > "Send Checksum Offload" > HKR, Ndi\Params\SendChksum, Type, 0, "enum" > -HKR, Ndi\Params\SendChksum, Default, 0, "0" > +HKR, Ndi\Params\SendChksum, Default, 0, "1" > HKR, Ndi\Params\SendChksum, Optional, 0, "0" > HKR, Ndi\Params\SendChksum\enum, "0", 0, "Disabled" > -HKR, Ndi\Params\SendChksum\enum, "1", 0, "Enabled" > +HKR, Ndi\Params\SendChksum\enum, "1", 0, "Enabled (if > supported by HW)" > +HKR, Ndi\Params\SendChksum\enum, "2", 0, "Bypass" > > HKR, Ndi\Params\RecvChksum, ParamDesc, 0, > "Recv Checksum Offload" > HKR, Ndi\Params\RecvChksum, Type, 0, "enum" > -HKR, Ndi\Params\RecvChksum, Default, 0, "0" > +HKR, Ndi\Params\RecvChksum, Default, 0, "1" > HKR, Ndi\Params\RecvChksum, Optional, 0, "0" > HKR, Ndi\Params\RecvChksum\enum, "0", 0, "Disabled" > -HKR, Ndi\Params\RecvChksum\enum, "1", 0, "Enabled" > +HKR, Ndi\Params\RecvChksum\enum, "1", 0, "Enabled (if > supported by HW)" > +HKR, Ndi\Params\RecvChksum\enum, "2", 0, "Bypass" > > +HKR, Ndi\Params\lso, ParamDesc, 0, "Large Send Offload" > +HKR, Ndi\Params\lso, Type, 0, "enum" > +HKR, Ndi\Params\lso, Default, 0, "0" > +HKR, Ndi\Params\lso, Optional, 0, "0" > +HKR, Ndi\Params\lso\enum, "0", 0, "Disabled" > +HKR, Ndi\Params\lso\enum, "1", 0, "Enabled" > + > + > HKR, Ndi\Params\SaTimeout, ParamDesc, 0, "SA > Query Timeout (ms)" > HKR, Ndi\Params\SaTimeout, Type, 0, "dword" > HKR, Ndi\Params\SaTimeout, Default, 0, "1000" > Index: D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c > =================================================================== > --- > D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c > (revision 3106) > +++ > D:/Windows/MLNX_WINOF_CSUM_ARBEL/hw/mlx4/kernel/bus/ib/cq.c > (revision 3107) > @@ -498,7 +498,7 @@ > wc->recv.ud.path_bits = > (u8)(cqe->g_mlpath_rqpn & 0x7f); > wc->recv.ud.recv_opt |= > cqe->g_mlpath_rqpn & 0x080 ? IB_RECV_OPT_GRH_VALID : 0; > wc->recv.ud.pkey_index = > (u16)(be32_to_cpu(cqe->immed_rss_invalid) & 0x7f); > - wc->csum_ok = > mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum); > + wc->recv.ud.csum_ok = > +mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,cqe->checksum); > } > if (!is_send && cqe->rlid == 0){ > > MLX4_PRINT(TRACE_LEVEL_INFORMATION,MLX4_DBG_CQ,("found rlid > == 0 \n ")); _______________________________________________ ofw mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
