On 11.12.2015 17:07, Ion Grigore wrote: > Comments inlined > > -----Original Message----- > From: Maxim Uvarov [mailto:maxim.uva...@linaro.org] > Sent: Friday, December 11, 2015 11:20 AM > To: Ilya Maximets <i.maxim...@samsung.com>; Grigore Ion-B17953 > <ion.grig...@freescale.com>; lng-odp@lists.linaro.org > Subject: Re: [PATCHv6] helper : Fix UDP checksum computation > > On 12/11/2015 10:39, Ilya Maximets wrote: >> Comments inlined. >> >> On 10.12.2015 12:59, ion.grig...@freescale.com wrote: >>> From: Grigore Ion <ion.grig...@freescale.com> >>> >>> This patch fixes the following problems: >>> - checksum computation for LE platforms >>> - checksum is computed in the CPU endianness. The returned result >>> must be converted to the BE ordering when it is used to update the >>> UDP checksum in a packet. >>> - checksum computation for packets having the UDP length not a >>> multiple of 2 >>> - fixes the UDP checksum associated validation test >>> >>> Signed-off-by: Grigore Ion <ion.grig...@freescale.com> >>> --- >>> v6: >>> - Make code more understandable (Ilya Maximets) >>> v5: >>> - Checksum in CPU endianness fix added (Ilya Maximets) >>> v4: >>> - Verify checksum in CPU endianness in the associated test >>> (Ilya Maximets) >>> v3: >>> - fix the UDP checksum computation in the associated test >>> (Maxim Uvarov) >>> v2: >>> - patch updated to the last master (Maxim Uvarov) >>> v1: >>> - Move variables declaration on top of block. (Maxim Uvarov) >>> - Check patch with checkpatch script. (Maxim Uvarov) >>> - L3 header presence is tested twice. (Alexandru Badicioiu) >>> - Remove unnecessary check for L3 header presence. (Bill Fischofer) >>> - Modify check of odp_packet_l4_offset() return. (Bill Fischofer) >>> >>> helper/include/odp/helper/udp.h | 81 >>> +++++++++++++++++++++++++---------------- >>> helper/test/odp_chksum.c | 4 +- >>> 2 files changed, 51 insertions(+), 34 deletions(-) >>> >>> diff --git a/helper/include/odp/helper/udp.h >>> b/helper/include/odp/helper/udp.h index 06c439b..d0599b1 100644 >>> --- a/helper/include/odp/helper/udp.h >>> +++ b/helper/include/odp/helper/udp.h >>> @@ -4,7 +4,6 @@ >>> * SPDX-License-Identifier: BSD-3-Clause >>> */ >>> >>> - >>> /** >>> * @file >>> * >>> @@ -22,7 +21,6 @@ extern "C" { >>> #include <odp/debug.h> >>> #include <odp/byteorder.h> >>> >>> - >>> /** @addtogroup odph_header ODPH HEADER >>> * @{ >>> */ >>> @@ -39,51 +37,70 @@ typedef struct ODP_PACKED { >>> } odph_udphdr_t; >>> >>> /** >>> + * Perform byte swap required by UDP checksum computation algorithm >>> +*/ static inline uint16_t csum_bswap(uint16_t val) { #if >>> +ODP_BYTE_ORDER == ODP_LITTLE_ENDIAN >>> + val = __odp_builtin_bswap16((__odp_force uint16_t)val); #endif >>> + return val; >>> +} >> Please, don't duplicate the code. >> >>> + >>> +/** >>> + * Pad a byte value to the left or to the right as required by UDP >>> +checksum >>> + * computation algorithm and convert the result to CPU native uint16_t. >>> + * Left padding is performed for the IP protocol field in the UDP >>> + * pseudo-header (RFC 768). Right padding is performed in the case >>> +of the odd >>> + * byte in a UDP packet having the length not a 2 multiple. >>> + */ >>> +static inline uint16_t u8_pad_to_u16(uint8_t val, odp_bool_t left) { >>> + uint16_t ret; >>> + >>> + ret = (left) ? val : val << 8; >>> + return csum_bswap(ret); >>> +} >>> + >>> +/** >>> * UDP checksum >>> * >>> * This function uses odp packet to calc checksum >>> * >>> * @param pkt calculate chksum for pkt >>> - * @return checksum value >>> + * @return checksum value in CPU endianness >>> */ >>> static inline uint16_t odph_ipv4_udp_chksum(odp_packet_t pkt) >>> { >>> - uint32_t sum = 0; >>> - odph_udphdr_t *udph; >>> - odph_ipv4hdr_t *iph; >>> - uint16_t udplen; >>> - uint8_t *buf; >>> - >>> - if (!odp_packet_l3_offset(pkt)) >>> - return 0; >>> + odph_ipv4hdr_t *iph; >>> + odph_udphdr_t *udph; >>> + uint32_t sum; >>> + uint16_t udplen, *buf; >>> >>> - if (!odp_packet_l4_offset(pkt)) >>> + if (odp_packet_l4_offset(pkt) == ODP_PACKET_OFFSET_INVALID) >>> return 0; >>> - >>> iph = (odph_ipv4hdr_t *)odp_packet_l3_ptr(pkt, NULL); >>> udph = (odph_udphdr_t *)odp_packet_l4_ptr(pkt, NULL); >>> - udplen = odp_be_to_cpu_16(udph->length); >>> - >>> - /* 32-bit sum of all 16-bit words covered by UDP chksum */ >>> + /* 32-bit sum of UDP pseudo-header */ >>> sum = (iph->src_addr & 0xFFFF) + (iph->src_addr >> 16) + >>> - (iph->dst_addr & 0xFFFF) + (iph->dst_addr >> 16) + >>> - (uint16_t)iph->proto + udplen; >>> - for (buf = (uint8_t *)udph; udplen > 1; udplen -= 2) { >>> - sum += ((*buf << 8) + *(buf + 1)); >>> - buf += 2; >>> - } >>> - >>> - /* Fold sum to 16 bits: add carrier to result */ >>> - while (sum >> 16) >>> - sum = (sum & 0xFFFF) + (sum >> 16); >>> - >>> + (iph->dst_addr & 0xFFFF) + (iph->dst_addr >> 16) + >>> + u8_pad_to_u16(iph->proto, 1) + udph->length; >> I meant something like: >> >> union { >> uint8_t v8[2]; >> uint16_t v16; >> } val; >> >> val.v8[0] = 0; >> val.v8[1] = iph->proto; >> > > The val.val16 must be swapped on LE platforms.
NO. > > >> sum = (iph->src_addr & 0xFFFF) + (iph->src_addr >> 16) + >> (iph->dst_addr & 0xFFFF) + (iph->dst_addr >> 16) + >> val.v16 + udph->length; >> >>> + udplen = odp_be_to_cpu_16(udph->length); >>> + buf = (uint16_t *)((void *)udph); >>> + /* 32-bit sum of UDP header (checksum field cleared) and UDP data */ >>> + for ( ; udplen > 1; udplen -= 2) >>> + sum += *buf++; >>> + /* Length is not a multiple of 2 bytes */ >>> + if (udplen) >>> + sum += u8_pad_to_u16(*buf, 0); >> val.v8[0] = *buf; >> val.v8[1] = 0; >> >> sum += val.v16; >> >> Isn't it more understandable, then artificial byte swapping? >> > > The val.val16 must be swapped on LE platforms. NO. > > Also see the the line : > return (sum == 0x0) ? 0xFFFF : csum_bswap(sum); > > Returned value must be swapped on LE platforms. Yes. That is normal. > >> Maxim, what do you think? > I like more solution with union I. > > It would be good to take this check sum calculation from some well maintained > place. > UDP check sum is crc algorithm right? > > FreeBSD has generic portable code: > https://github.com/freebsd/freebsd/blob/master/sys/netinet/in_cksum.c > And each arch has optimized code. > ARM64: > https://github.com/freebsd/freebsd/blob/master/sys/arm64/arm64/in_cksum.c > > In api-next we have odp_hash_crc32() function. If we reuse that odp api than > platform can accelerate it. What do you think? > > Maxim. > > >> >>> + /* Fold sum to 16 bits */ >>> + sum = (sum & 0xFFFF) + (sum >> 16); >>> + /* Add carrier (0/1) to result */ >>> + sum += (sum >> 16); >>> /* 1's complement */ >>> sum = ~sum; >>> - >>> - /* set computation result */ >>> - sum = (sum == 0x0) ? 0xFFFF : sum; >>> - >>> - return sum; >>> + /* Set computation result in CPU endianness */ >>> + return (sum == 0x0) ? 0xFFFF : csum_bswap(sum); >>> } >>> >>> /** @internal Compile time assert */ diff --git >>> a/helper/test/odp_chksum.c b/helper/test/odp_chksum.c index >>> 1d417a8..152018a 100644 >>> --- a/helper/test/odp_chksum.c >>> +++ b/helper/test/odp_chksum.c >>> @@ -189,14 +189,14 @@ int main(int argc TEST_UNUSED, char *argv[] >>> TEST_UNUSED) >>> udp->dst_port = 0; >>> udp->length = odp_cpu_to_be_16(udat_size + ODPH_UDPHDR_LEN); >>> udp->chksum = 0; >>> - udp->chksum = odph_ipv4_udp_chksum(test_packet); >>> + udp->chksum = odp_cpu_to_be_16(odph_ipv4_udp_chksum(test_packet)); >>> >>> if (udp->chksum == 0) >>> return -1; >>> >>> printf("chksum = 0x%x\n", udp->chksum); >>> >>> - if (udp->chksum != 0xab2d) >>> + if (odp_be_to_cpu_16(udp->chksum) != 0x7e5a) >>> status = -1; >>> >>> odp_packet_free(test_packet); >>> > _______________________________________________ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp