On 11/08/15(Tue) 16:53, Mark Kettenis wrote:
> Like ix(4), em(4) hardware doesn't provide an easy/efficient way to
> guarantee alignment of protocol headers for received mbufs.  The
> current code makes an attempt to m_adj() the mbuf if the maximum
> hardware frame size is smaller than the cluster size.  But ever since
> we changed our drivers to always accept jumbo frames if the hardware
> is capable of doing so, this codepath is effectively dead on
> jumbo-capable hardware.  So on jumbo-capable hardware we'll always end
> up patching up the mbuf after the fact, which effectively means we'll
> do a bcopy() of the entire packet on strict alignment architectures.
> 
> The diff below avoids this by taking an approach similar to ix(4).
> This means that on strict alignment architectures, we'll use a larger
> cluster size such that after m_adj()usting the mbuf there are at least
> 2048 bytes for the hardware to play with.  This means we'll waste
> (quite) a bit of memory on those architectures.  But I think it makes
> more sense than copying around all the data in the rx path, especially
> with the direction we've taken with processing received packets in a
> kernel thread that may be running on a different CPU than the
> interrupt handler.  As a bonus, it simplifies the code quite a bit.

I like code simplification and the fact that it uses an approach similar
to ix(4).

> If we ever manage to fix the *8 pool diff, we can reduce the amount of
> wasted memory, by adding a suitably sized mbuf pool on
> strict-alignment architectures.  At least I think that would be
> preferable over the dedicated driver-specific pool that dlg@ proposed
> for ix(4).

This makes sense to me.

>             That is why I specified the size of the cluster as
> 
>   #define EM_MCLBYTES         (EM_RXBUFFER_2048 + ETHER_ALIGN)
> 
> Unfortunately that means that "systat mbuf" output is a bit confusing:
> 
> IFACE             LIVELOCKS  SIZE ALIVE   LWM   HWM   CWM
> System                    0   256    24           4
>                              4096    14          12
> 
> ...
> 
> em0                          2050    14    10   256    14
> 
> 
> See how the mbufs used by em0 are reported as 4096 under "System", but
> 2050 under "em0"?  We could fix that by looking up the right pool, and
> using its item size in the rxr ioctl if people think this is a problem.

How many sparc64 come with em(4)?  Can we assume that the amount of
wasted memory on such system is acceptable?  What about other strict-
alignment architectures?

> Index: if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.299
> diff -u -p -r1.299 if_em.c
> --- if_em.c   24 Jun 2015 09:40:54 -0000      1.299
> +++ if_em.c   11 Aug 2015 14:20:10 -0000
> @@ -229,11 +229,6 @@ void em_disable_aspm(struct em_softc *);
>  void em_txeof(struct em_softc *);
>  int  em_allocate_receive_structures(struct em_softc *);
>  int  em_allocate_transmit_structures(struct em_softc *);
> -#ifdef __STRICT_ALIGNMENT
> -void em_realign(struct em_softc *, struct mbuf *, u_int16_t *);
> -#else
> -#define em_realign(a, b, c) /* a, b, c */
> -#endif
>  int  em_rxfill(struct em_softc *);
>  void em_rxeof(struct em_softc *);
>  void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
> @@ -706,7 +701,7 @@ em_ioctl(struct ifnet *ifp, u_long comma
>  
>       case SIOCGIFRXR:
>               error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
> -                 NULL, MCLBYTES, &sc->rx_ring);
> +                 NULL, EM_MCLBYTES, &sc->rx_ring);
>               break;
>  
>       default:
> @@ -2518,14 +2513,15 @@ em_get_buf(struct em_softc *sc, int i)
>               return (ENOBUFS);
>       }
>  
> -     m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
> +     m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES);
>       if (!m) {
>               sc->mbuf_cluster_failed++;
>               return (ENOBUFS);
>       }
> -     m->m_len = m->m_pkthdr.len = MCLBYTES;
> -     if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
> -             m_adj(m, ETHER_ALIGN);
> +     m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
> +#ifdef __STRICT_ALIGNMENT
> +     m_adj(m, ETHER_ALIGN);
> +#endif
>  
>       error = bus_dmamap_load_mbuf(sc->rxtag, pkt->map, m, BUS_DMA_NOWAIT);
>       if (error) {
> @@ -2574,8 +2570,8 @@ em_allocate_receive_structures(struct em
>  
>       rx_buffer = sc->rx_buffer_area;
>       for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) {
> -             error = bus_dmamap_create(sc->rxtag, MCLBYTES, 1,
> -                 MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
> +             error = bus_dmamap_create(sc->rxtag, EM_MCLBYTES, 1,
> +                 EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
>               if (error != 0) {
>                       printf("%s: em_allocate_receive_structures: "
>                           "bus_dmamap_create failed; error %u\n",
> @@ -2771,53 +2767,6 @@ em_free_receive_structures(struct em_sof
>       }
>  }
>  
> -#ifdef __STRICT_ALIGNMENT
> -void
> -em_realign(struct em_softc *sc, struct mbuf *m, u_int16_t *prev_len_adj)
> -{
> -     unsigned char tmp_align_buf[ETHER_ALIGN];
> -     int tmp_align_buf_len = 0;
> -
> -     /*
> -      * The Ethernet payload is not 32-bit aligned when
> -      * Jumbo packets are enabled, so on architectures with
> -      * strict alignment we need to shift the entire packet
> -      * ETHER_ALIGN bytes. Ugh.
> -      */
> -     if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
> -             return;
> -
> -     if (*prev_len_adj > sc->align_buf_len)
> -             *prev_len_adj -= sc->align_buf_len;
> -     else
> -             *prev_len_adj = 0;
> -
> -     if (m->m_len > (MCLBYTES - ETHER_ALIGN)) {
> -             bcopy(m->m_data + (MCLBYTES - ETHER_ALIGN),
> -                 &tmp_align_buf, ETHER_ALIGN);
> -             tmp_align_buf_len = m->m_len -
> -                 (MCLBYTES - ETHER_ALIGN);
> -             m->m_len -= ETHER_ALIGN;
> -     } 
> -
> -     if (m->m_len) {
> -             bcopy(m->m_data, m->m_data + ETHER_ALIGN, m->m_len);
> -             if (!sc->align_buf_len)
> -                     m->m_data += ETHER_ALIGN;
> -     }
> -
> -     if (sc->align_buf_len) {
> -             m->m_len += sc->align_buf_len;
> -             bcopy(&sc->align_buf, m->m_data, sc->align_buf_len);
> -     }
> -
> -     if (tmp_align_buf_len) 
> -             bcopy(&tmp_align_buf, &sc->align_buf, tmp_align_buf_len);
> -
> -     sc->align_buf_len = tmp_align_buf_len;
> -}
> -#endif /* __STRICT_ALIGNMENT */
> -
>  int
>  em_rxfill(struct em_softc *sc)
>  {
> @@ -2944,8 +2893,6 @@ em_rxeof(struct em_softc *sc)
>               if (accept_frame) {
>                       /* Assign correct length to the current fragment */
>                       m->m_len = len;
> -
> -                     em_realign(sc, m, &prev_len_adj); /* STRICT_ALIGN */
>  
>                       if (sc->fmp == NULL) {
>                               m->m_pkthdr.len = m->m_len;
> Index: if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> retrieving revision 1.54
> diff -u -p -r1.54 if_em.h
> --- if_em.h   26 Dec 2014 05:46:32 -0000      1.54
> +++ if_em.h   11 Aug 2015 14:20:10 -0000
> @@ -266,6 +266,12 @@ typedef int      boolean_t;
>  #define EM_RXBUFFER_8192     8192
>  #define EM_RXBUFFER_16384    16384
>  
> +#ifdef __STRICT_ALIGNMENT
> +#define EM_MCLBYTES          (EM_RXBUFFER_2048 + ETHER_ALIGN)
> +#else
> +#define EM_MCLBYTES          EM_RXBUFFER_2048
> +#endif
> +
>  #define EM_MAX_SCATTER               64
>  #define EM_TSO_SIZE          65535
>  
> @@ -323,12 +329,6 @@ struct em_softc {
>       struct timeout  em_intr_enable;
>       struct timeout  timer_handle;
>       struct timeout  tx_fifo_timer_handle;
> -
> -#ifdef __STRICT_ALIGNMENT
> -     /* Used for carrying forward alignment adjustments */
> -     unsigned char   align_buf[ETHER_ALIGN]; /* tail of unaligned packet */
> -     u_int8_t        align_buf_len;          /* bytes in tail */
> -#endif /* __STRICT_ALIGNMENT */
>  
>       /* Info about the board itself */
>       u_int32_t       part_num;
> 

Reply via email to