On 11/08/15(Tue) 16:53, Mark Kettenis wrote:
> Like ix(4), em(4) hardware doesn't provide an easy/efficient way to
> guarantee alignment of protocol headers for received mbufs. The
> current code makes an attempt to m_adj() the mbuf if the maximum
> hardware frame size is smaller than the cluster size. But ever since
> we changed our drivers to always accept jumbo frames if the hardware
> is capable of doing so, this codepath is effectively dead on
> jumbo-capable hardware. So on jumbo-capable hardware we'll always end
> up patching up the mbuf after the fact, which effectively means we'll
> do a bcopy() of the entire packet on strict alignment architectures.
>
> The diff below avoids this by taking an approach similar to ix(4).
> This means that on strict alignment architectures, we'll use a larger
> cluster size such that after m_adj()usting the mbuf there are at least
> 2048 bytes for the hardware to play with. This means we'll waste
> (quite) a bit of memory on those architectures. But I think it makes
> more sense than copying around all the data in the rx path, especially
> with the direction we've taken with processing received packets in a
> kernel thread that may be running on a different CPU than the
> interrupt handler. As a bonus, it simplifies the code quite a bit.
I like code simplification and the fact that it uses an approach similar
to ix(4).
> If we ever manage to fix the *8 pool diff, we can reduce the amount of
> wasted memory, by adding a suitably sized mbuf pool on
> strict-alignment architectures. At least I think that would be
> preferable over the dedicated driver-specific pool that dlg@ proposed
> for ix(4).
This makes sense to me.
> That is why I specified the size of the cluster as
>
> #define EM_MCLBYTES (EM_RXBUFFER_2048 + ETHER_ALIGN)
>
> Unfortunately that means that "systat mbuf" output is a bit confusing:
>
> IFACE LIVELOCKS SIZE ALIVE LWM HWM CWM
> System 0 256 24 4
> 4096 14 12
>
> ...
>
> em0 2050 14 10 256 14
>
>
> See how the mbufs used by em0 are reported as 4096 under "System", but
> 2050 under "em0"? We could fix that by looking up the right pool, and
> using its item size in the rxr ioctl if people think this is a problem.
How many sparc64 come with em(4)? Can we assume that the amount of
wasted memory on such system is acceptable? What about other strict-
alignment architectures?
> Index: if_em.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.c,v
> retrieving revision 1.299
> diff -u -p -r1.299 if_em.c
> --- if_em.c 24 Jun 2015 09:40:54 -0000 1.299
> +++ if_em.c 11 Aug 2015 14:20:10 -0000
> @@ -229,11 +229,6 @@ void em_disable_aspm(struct em_softc *);
> void em_txeof(struct em_softc *);
> int em_allocate_receive_structures(struct em_softc *);
> int em_allocate_transmit_structures(struct em_softc *);
> -#ifdef __STRICT_ALIGNMENT
> -void em_realign(struct em_softc *, struct mbuf *, u_int16_t *);
> -#else
> -#define em_realign(a, b, c) /* a, b, c */
> -#endif
> int em_rxfill(struct em_softc *);
> void em_rxeof(struct em_softc *);
> void em_receive_checksum(struct em_softc *, struct em_rx_desc *,
> @@ -706,7 +701,7 @@ em_ioctl(struct ifnet *ifp, u_long comma
>
> case SIOCGIFRXR:
> error = if_rxr_ioctl((struct if_rxrinfo *)ifr->ifr_data,
> - NULL, MCLBYTES, &sc->rx_ring);
> + NULL, EM_MCLBYTES, &sc->rx_ring);
> break;
>
> default:
> @@ -2518,14 +2513,15 @@ em_get_buf(struct em_softc *sc, int i)
> return (ENOBUFS);
> }
>
> - m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
> + m = MCLGETI(NULL, M_DONTWAIT, NULL, EM_MCLBYTES);
> if (!m) {
> sc->mbuf_cluster_failed++;
> return (ENOBUFS);
> }
> - m->m_len = m->m_pkthdr.len = MCLBYTES;
> - if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
> - m_adj(m, ETHER_ALIGN);
> + m->m_len = m->m_pkthdr.len = EM_MCLBYTES;
> +#ifdef __STRICT_ALIGNMENT
> + m_adj(m, ETHER_ALIGN);
> +#endif
>
> error = bus_dmamap_load_mbuf(sc->rxtag, pkt->map, m, BUS_DMA_NOWAIT);
> if (error) {
> @@ -2574,8 +2570,8 @@ em_allocate_receive_structures(struct em
>
> rx_buffer = sc->rx_buffer_area;
> for (i = 0; i < sc->num_rx_desc; i++, rx_buffer++) {
> - error = bus_dmamap_create(sc->rxtag, MCLBYTES, 1,
> - MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
> + error = bus_dmamap_create(sc->rxtag, EM_MCLBYTES, 1,
> + EM_MCLBYTES, 0, BUS_DMA_NOWAIT, &rx_buffer->map);
> if (error != 0) {
> printf("%s: em_allocate_receive_structures: "
> "bus_dmamap_create failed; error %u\n",
> @@ -2771,53 +2767,6 @@ em_free_receive_structures(struct em_sof
> }
> }
>
> -#ifdef __STRICT_ALIGNMENT
> -void
> -em_realign(struct em_softc *sc, struct mbuf *m, u_int16_t *prev_len_adj)
> -{
> - unsigned char tmp_align_buf[ETHER_ALIGN];
> - int tmp_align_buf_len = 0;
> -
> - /*
> - * The Ethernet payload is not 32-bit aligned when
> - * Jumbo packets are enabled, so on architectures with
> - * strict alignment we need to shift the entire packet
> - * ETHER_ALIGN bytes. Ugh.
> - */
> - if (sc->hw.max_frame_size <= (MCLBYTES - ETHER_ALIGN))
> - return;
> -
> - if (*prev_len_adj > sc->align_buf_len)
> - *prev_len_adj -= sc->align_buf_len;
> - else
> - *prev_len_adj = 0;
> -
> - if (m->m_len > (MCLBYTES - ETHER_ALIGN)) {
> - bcopy(m->m_data + (MCLBYTES - ETHER_ALIGN),
> - &tmp_align_buf, ETHER_ALIGN);
> - tmp_align_buf_len = m->m_len -
> - (MCLBYTES - ETHER_ALIGN);
> - m->m_len -= ETHER_ALIGN;
> - }
> -
> - if (m->m_len) {
> - bcopy(m->m_data, m->m_data + ETHER_ALIGN, m->m_len);
> - if (!sc->align_buf_len)
> - m->m_data += ETHER_ALIGN;
> - }
> -
> - if (sc->align_buf_len) {
> - m->m_len += sc->align_buf_len;
> - bcopy(&sc->align_buf, m->m_data, sc->align_buf_len);
> - }
> -
> - if (tmp_align_buf_len)
> - bcopy(&tmp_align_buf, &sc->align_buf, tmp_align_buf_len);
> -
> - sc->align_buf_len = tmp_align_buf_len;
> -}
> -#endif /* __STRICT_ALIGNMENT */
> -
> int
> em_rxfill(struct em_softc *sc)
> {
> @@ -2944,8 +2893,6 @@ em_rxeof(struct em_softc *sc)
> if (accept_frame) {
> /* Assign correct length to the current fragment */
> m->m_len = len;
> -
> - em_realign(sc, m, &prev_len_adj); /* STRICT_ALIGN */
>
> if (sc->fmp == NULL) {
> m->m_pkthdr.len = m->m_len;
> Index: if_em.h
> ===================================================================
> RCS file: /cvs/src/sys/dev/pci/if_em.h,v
> retrieving revision 1.54
> diff -u -p -r1.54 if_em.h
> --- if_em.h 26 Dec 2014 05:46:32 -0000 1.54
> +++ if_em.h 11 Aug 2015 14:20:10 -0000
> @@ -266,6 +266,12 @@ typedef int boolean_t;
> #define EM_RXBUFFER_8192 8192
> #define EM_RXBUFFER_16384 16384
>
> +#ifdef __STRICT_ALIGNMENT
> +#define EM_MCLBYTES (EM_RXBUFFER_2048 + ETHER_ALIGN)
> +#else
> +#define EM_MCLBYTES EM_RXBUFFER_2048
> +#endif
> +
> #define EM_MAX_SCATTER 64
> #define EM_TSO_SIZE 65535
>
> @@ -323,12 +329,6 @@ struct em_softc {
> struct timeout em_intr_enable;
> struct timeout timer_handle;
> struct timeout tx_fifo_timer_handle;
> -
> -#ifdef __STRICT_ALIGNMENT
> - /* Used for carrying forward alignment adjustments */
> - unsigned char align_buf[ETHER_ALIGN]; /* tail of unaligned packet */
> - u_int8_t align_buf_len; /* bytes in tail */
> -#endif /* __STRICT_ALIGNMENT */
>
> /* Info about the board itself */
> u_int32_t part_num;
>