zijianzhang@ wrote:
> From: Zijian Zhang <[email protected]>
> 
> We update selftests/net/msg_zerocopy.c to accommodate the new mechanism,

Please make commit messages stand on their own. If someone does a git
blame, make the message self explanatory. Replace "the new mechanism"
with sendmsg SCM_ZC_NOTIFICATION.

In patch 2 or as a separate patch 4, also add a new short section on
this API in Documentation/networking/msg_zerocopy.rst. Probably with
the same contents as a good explanation of the feature in the commit
message of patch 2.

> cfg_notification_limit has the same semantics for both methods. Test
> results are as follows, we update skb_orphan_frags_rx to the same as
> skb_orphan_frags to support zerocopy in the localhost test.
> 
> cfg_notification_limit = 1, both method get notifications after 1 calling
> of sendmsg. In this case, the new method has around 17% cpu savings in TCP
> and 23% cpu savings in UDP.
> +---------------------+---------+---------+---------+---------+
> | Test Type / Protocol| TCP v4  | TCP v6  | UDP v4  | UDP v6  |
> +---------------------+---------+---------+---------+---------+
> | ZCopy (MB)          | 7523    | 7706    | 7489    | 7304    |
> +---------------------+---------+---------+---------+---------+
> | New ZCopy (MB)      | 8834    | 8993    | 9053    | 9228    |
> +---------------------+---------+---------+---------+---------+
> | New ZCopy / ZCopy   | 117.42% | 116.70% | 120.88% | 126.34% |
> +---------------------+---------+---------+---------+---------+
> 
> cfg_notification_limit = 32, both get notifications after 32 calling of
> sendmsg, which means more chances to coalesce notifications, and less
> overhead of poll + recvmsg for the original method. In this case, the new
> method has around 7% cpu savings in TCP and slightly better cpu usage in
> UDP. In the env of selftest, notifications of TCP are more likely to be
> out of order than UDP, it's easier to coalesce more notifications in UDP.
> The original method can get one notification with range of 32 in a recvmsg
> most of the time. In TCP, most notifications' range is around 2, so the
> original method needs around 16 recvmsgs to get notified in one round.
> That's the reason for the "New ZCopy / ZCopy" diff in TCP and UDP here.
> +---------------------+---------+---------+---------+---------+
> | Test Type / Protocol| TCP v4  | TCP v6  | UDP v4  | UDP v6  |
> +---------------------+---------+---------+---------+---------+
> | ZCopy (MB)          | 8842    | 8735    | 10072   | 9380    |
> +---------------------+---------+---------+---------+---------+
> | New ZCopy (MB)      | 9366    | 9477    | 10108   | 9385    |
> +---------------------+---------+---------+---------+---------+
> | New ZCopy / ZCopy   | 106.00% | 108.28% | 100.31% | 100.01% |
> +---------------------+---------+---------+---------+---------+
> 
> In conclusion, when notification interval is small or notifications are
> hard to be coalesced, the new mechanism is highly recommended. Otherwise,
> the performance gain from the new mechanism is very limited.
> 
> Signed-off-by: Zijian Zhang <[email protected]>
> Signed-off-by: Xiaochun Lu <[email protected]>

> -static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int 
> domain)
> +static void add_zcopy_info(struct msghdr *msg)
> +{
> +     struct zc_info *zc_info;
> +     struct cmsghdr *cm;
> +
> +     if (!msg->msg_control)
> +             error(1, errno, "NULL user arg");

Don't add precondition checks for code entirely under your control.
This is not a user API.

> +     cm = (struct cmsghdr *)msg->msg_control;
> +     cm->cmsg_len = CMSG_LEN(ZC_INFO_SIZE);
> +     cm->cmsg_level = SOL_SOCKET;
> +     cm->cmsg_type = SCM_ZC_NOTIFICATION;
> +
> +     zc_info = (struct zc_info *)CMSG_DATA(cm);
> +     zc_info->size = ZC_NOTIFICATION_MAX;
> +
> +     added_zcopy_info = true;

Just initialize every time? Is this here to reuse the same msg_control
as long as metadata is returned?

> +}
> +
> +static bool do_sendmsg(int fd, struct msghdr *msg,
> +                    enum notification_type do_zerocopy, int domain)
>  {
>       int ret, len, i, flags;
>       static uint32_t cookie;
> @@ -200,6 +233,12 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool 
> do_zerocopy, int domain)
>                       msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
>                       msg->msg_control = (struct cmsghdr *)ckbuf;
>                       add_zcopy_cookie(msg, ++cookie);
> +             } else if (do_zerocopy == MSG_ZEROCOPY_NOTIFY_SENDMSG &&
> +                        sends_since_notify + 1 >= cfg_notification_limit) {
> +                     memset(&msg->msg_control, 0, sizeof(msg->msg_control));
> +                     msg->msg_controllen = CMSG_SPACE(ZC_INFO_SIZE);
> +                     msg->msg_control = (struct cmsghdr *)zc_ckbuf;
> +                     add_zcopy_info(msg);
>               }
>       }
>  
> @@ -218,7 +257,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool 
> do_zerocopy, int domain)
>               if (do_zerocopy && ret)
>                       expected_completions++;
>       }
> -     if (do_zerocopy && domain == PF_RDS) {
> +     if (msg->msg_control) {
>               msg->msg_control = NULL;
>               msg->msg_controllen = 0;
>       }
> @@ -466,6 +505,44 @@ static void do_recv_completions(int fd, int domain)
>       sends_since_notify = 0;
>  }
>  
> +static void do_recv_completions2(void)

functionname2 is very uninformative.

do_recv_completions_sendmsg or so.

> +{
> +     struct cmsghdr *cm = (struct cmsghdr *)zc_ckbuf;
> +     struct zc_info *zc_info;
> +     __u32 hi, lo, range;
> +     __u8 zerocopy;
> +     int i;
> +
> +     zc_info = (struct zc_info *)CMSG_DATA(cm);
> +     for (i = 0; i < zc_info->size; i++) {
> +             hi = zc_info->arr[i].hi;
> +             lo = zc_info->arr[i].lo;
> +             zerocopy = zc_info->arr[i].zerocopy;
> +             range = hi - lo + 1;
> +
> +             if (cfg_verbose && lo != next_completion)
> +                     fprintf(stderr, "gap: %u..%u does not append to %u\n",
> +                             lo, hi, next_completion);
> +             next_completion = hi + 1;
> +
> +             if (zerocopied == -1) {
> +                     zerocopied = zerocopy;
> +             } else if (zerocopied != zerocopy) {
> +                     fprintf(stderr, "serr: inconsistent\n");
> +                     zerocopied = zerocopy;
> +             }
> +
> +             completions += range;
> +             sends_since_notify -= range;
> +
> +             if (cfg_verbose >= 2)
> +                     fprintf(stderr, "completed: %u (h=%u l=%u)\n",
> +                             range, hi, lo);
> +     }
> +
> +     added_zcopy_info = false;
> +}
> +
>  /* Wait for all remaining completions on the errqueue */
>  static void do_recv_remaining_completions(int fd, int domain)
>  {
> @@ -553,11 +630,16 @@ static void do_tx(int domain, int type, int protocol)
>               else
>                       do_sendmsg(fd, &msg, cfg_zerocopy, domain);
>  
> -             if (cfg_zerocopy && sends_since_notify >= 
> cfg_notification_limit)
> +             if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_ERRQUEUE &&
> +                 sends_since_notify >= cfg_notification_limit)
>                       do_recv_completions(fd, domain);
>  
> +             if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_SENDMSG &&
> +                 added_zcopy_info)
> +                     do_recv_completions2();
> +
>               while (!do_poll(fd, POLLOUT)) {
> -                     if (cfg_zerocopy)
> +                     if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_ERRQUEUE)
>                               do_recv_completions(fd, domain);
>               }
>  
> @@ -715,7 +797,7 @@ static void parse_opts(int argc, char **argv)
>  
>       cfg_payload_len = max_payload_len;
>  
> -     while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
> +     while ((c = getopt(argc, argv, "46c:C:D:i:l:mnp:rs:S:t:vz")) != -1) {
>               switch (c) {
>               case '4':
>                       if (cfg_family != PF_UNSPEC)
> @@ -749,6 +831,9 @@ static void parse_opts(int argc, char **argv)
>               case 'm':
>                       cfg_cork_mixed = true;
>                       break;
> +             case 'n':
> +                     cfg_zerocopy = MSG_ZEROCOPY_NOTIFY_SENDMSG;
> +                     break;

How about -Z to make clear that this is still MSG_ZEROCOPY, just with
a different notification mechanism.

And perhaps add a testcase that exercises both this mechanism and
existing recvmsg MSG_ERRQUEUE. As they should work in parallel and
concurrently in a multithreaded environment.


Reply via email to