On Fri, 4 Jun 2021 17:38:31 +0200 Christian Schoenebeck <qemu_...@crudebyte.com> wrote:
> As with previous performance optimization on Treaddir handling; > reduce the overall latency, i.e. overall time spent on processing > a Twalk request by reducing the amount of thread hops between the > 9p server's main thread and fs worker thread(s). > > In fact this patch even reduces the thread hops for Twalk handling > to its theoritical minimum of exactly 2 thread hops: > > main thread -> fs worker thread -> main thread > > This is achieved by doing all the required fs driver tasks altogether > in a single v9fs_co_run_in_worker({ ... }); code block. > > Signed-off-by: Christian Schoenebeck <qemu_...@crudebyte.com> > --- > hw/9pfs/9p.c | 89 +++++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 70 insertions(+), 19 deletions(-) > > diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c > index 7be07f2d68..2815257f42 100644 > --- a/hw/9pfs/9p.c > +++ b/hw/9pfs/9p.c > @@ -1705,9 +1705,9 @@ static void coroutine_fn v9fs_walk(void *opaque) > int name_idx; > V9fsQID *qids = NULL; > int i, err = 0; > - V9fsPath dpath, path; > + V9fsPath dpath, path, *pathes = NULL; > uint16_t nwnames; > - struct stat stbuf; > + struct stat stbuf, fidst, *stbufs = NULL; > size_t offset = 7; > int32_t fid, newfid; > V9fsString *wnames = NULL; > @@ -1733,6 +1733,8 @@ static void coroutine_fn v9fs_walk(void *opaque) > if (nwnames) { > wnames = g_new0(V9fsString, nwnames); > qids = g_new0(V9fsQID, nwnames); > + stbufs = g_new0(struct stat, nwnames); > + pathes = g_new0(V9fsPath, nwnames); > for (i = 0; i < nwnames; i++) { > err = pdu_unmarshal(pdu, offset, "s", &wnames[i]); > if (err < 0) { > @@ -1753,39 +1755,85 @@ static void coroutine_fn v9fs_walk(void *opaque) > > v9fs_path_init(&dpath); > v9fs_path_init(&path); > + /* > + * Both dpath and path initially point to fidp. > + * Needed to handle request with nwnames == 0 > + */ > + v9fs_path_copy(&dpath, &fidp->path); > + v9fs_path_copy(&path, &fidp->path); > > - err = v9fs_co_lstat(pdu, &fidp->path, &stbuf); > + /* > + * To keep latency (i.e. overall execution time for processing this > + * Twalk client request) as small as possible, run all the required fs > + * driver code altogether inside the following block. > + */ > + v9fs_co_run_in_worker({ > + if (v9fs_request_cancelled(pdu)) { > + err = -EINTR; > + break; > + } > + err = s->ops->lstat(&s->ctx, &dpath, &fidst); > + if (err < 0) { > + err = -errno; > + break; > + } > + stbuf = fidst; > + for (name_idx = 0; name_idx < nwnames; name_idx++) { > + if (v9fs_request_cancelled(pdu)) { > + err = -EINTR; > + break; > + } > + if (!same_stat_id(&pdu->s->root_st, &stbuf) || > + strcmp("..", wnames[name_idx].data)) > + { > + err = s->ops->name_to_path(&s->ctx, &dpath, > + wnames[name_idx].data, &path); It seems you could pass &pathes[name_idx] instead of &path and... > + if (err < 0) { > + err = -errno; > + break; > + } > + if (v9fs_request_cancelled(pdu)) { > + err = -EINTR; > + break; > + } > + err = s->ops->lstat(&s->ctx, &path, &stbuf); > + if (err < 0) { > + err = -errno; > + break; > + } > + stbufs[name_idx] = stbuf; > + v9fs_path_copy(&dpath, &path); > + v9fs_path_copy(&pathes[name_idx], &path); ... avoid a copy. Also, I believe the path -> dpath could be avoided as well in the existing code, but this is a separate cleanup. > + } > + } > + }); > + /* > + * Handle all the rest of this Twalk request on main thread ... > + */ > if (err < 0) { > goto out; > } > - err = stat_to_qid(pdu, &stbuf, &qid); > + > + err = stat_to_qid(pdu, &fidst, &qid); > if (err < 0) { > goto out; > } > + stbuf = fidst; > > - /* > - * Both dpath and path initially poin to fidp. > - * Needed to handle request with nwnames == 0 > - */ > + /* reset dpath and path */ > v9fs_path_copy(&dpath, &fidp->path); > v9fs_path_copy(&path, &fidp->path); > + > for (name_idx = 0; name_idx < nwnames; name_idx++) { > if (!same_stat_id(&pdu->s->root_st, &stbuf) || > - strcmp("..", wnames[name_idx].data)) { > - err = v9fs_co_name_to_path(pdu, &dpath, wnames[name_idx].data, > - &path); > - if (err < 0) { > - goto out; > - } > - > - err = v9fs_co_lstat(pdu, &path, &stbuf); > - if (err < 0) { > - goto out; > - } > + strcmp("..", wnames[name_idx].data)) > + { > + stbuf = stbufs[name_idx]; > err = stat_to_qid(pdu, &stbuf, &qid); > if (err < 0) { > goto out; > } > + v9fs_path_copy(&path, &pathes[name_idx]); > v9fs_path_copy(&dpath, &path); > } > memcpy(&qids[name_idx], &qid, sizeof(qid)); > @@ -1821,9 +1869,12 @@ out_nofid: > if (nwnames && nwnames <= P9_MAXWELEM) { > for (name_idx = 0; name_idx < nwnames; name_idx++) { > v9fs_string_free(&wnames[name_idx]); > + v9fs_path_free(&pathes[name_idx]); > } > g_free(wnames); > g_free(qids); > + g_free(stbufs); > + g_free(pathes); All of these guys should be converted to g_autofree. Separate cleanup again. v9fs_walk() was already a bit hairy and the diffstat definitely doesn't improve things... this being said, the change makes sense and I haven't spotted anything wrong, so: Reviewed-by: Greg Kurz <gr...@kaod.org> Improvements could be to : - track the previous path with a V9fsPath * instead of copying - have a separate path for the nwnames == 0 case > } > } >