Re: simplify the openrsync uploader
On Wed, May 05, 2021 at 11:34:17PM +0200, Sebastian Benoit wrote: > Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.05.05 17:53:20 +0200: > > The rsync uploader (what is the generator in rsync) can be simplified and > > cleaned up a fair bit. > > > > There is some confusion of non-blocking IO on regular files and the idea > > to poll() between openat() and fstat(). This is all not needed and > > therefor a lot of the code handling files can be moved into pre_file. > > This also removes the UPLOAD_READ_LOCAL state since it is no longer > > needed. > > > > As a little extra gift this diff also plugs a mem leak in rsync_uploader. > > The mbuf buffer was not freed and so for every file being checksummed it > > would leak one of those. > > see below about that. > > Other than that its ok. > > > do { > > msz = pread(*fileinfd, mbuf, blk.len, offs); > > - if (msz < 0) { > > + if ((size_t)msz != blk.len && (size_t)msz != blk.rem) { > > ERR("pread"); > > close(*fileinfd); > > *fileinfd = -1; > > + free(mbuf); > > Isnt a free(blk.blks) needed as well, here and in more spots below? Will investigate but I think that will be a different diff then. > > return -1; > > } > > - if ((size_t)msz != blk.len && (size_t)msz != blk.rem) { > > - /* short read, try again */ > > - continue; > > - } > > init_blk([i], , offs, i, mbuf, sess); > > offs += blk.len; > > LOG3( > > @@ -947,6 +935,7 @@ rsync_uploader(struct upload *u, int *fi > > i++; > > } while (i < blk.blksz); > > > > + free(mbuf); > > close(*fileinfd); > > *fileinfd = -1; > > LOG3("%s: mapped %jd B with %zu blocks", > > > -- :wq Claudio
Re: simplify the openrsync uploader
Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.05.05 17:53:20 +0200: > The rsync uploader (what is the generator in rsync) can be simplified and > cleaned up a fair bit. > > There is some confusion of non-blocking IO on regular files and the idea > to poll() between openat() and fstat(). This is all not needed and > therefor a lot of the code handling files can be moved into pre_file. > This also removes the UPLOAD_READ_LOCAL state since it is no longer > needed. > > As a little extra gift this diff also plugs a mem leak in rsync_uploader. > The mbuf buffer was not freed and so for every file being checksummed it > would leak one of those. see below about that. Other than that its ok. > > -- > :wq Claudio > > Index: uploader.c > === > RCS file: /cvs/src/usr.bin/rsync/uploader.c,v > retrieving revision 1.24 > diff -u -p -r1.24 uploader.c > --- uploader.c22 Mar 2021 11:20:04 - 1.24 > +++ uploader.c5 May 2021 15:39:10 - > @@ -33,8 +33,7 @@ > > enum uploadst { > UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */ > - UPLOAD_WRITE_LOCAL, /* wait to write to sender */ > - UPLOAD_READ_LOCAL, /* wait to read from local file */ > + UPLOAD_WRITE, /* wait to write to sender */ > UPLOAD_FINISHED /* nothing more to do in phase */ > }; > > @@ -180,7 +179,8 @@ pre_link(struct upload *p, struct sess * > if (!sess->opts->preserve_links) { > WARNX("%s: ignoring symlink", f->path); > return 0; > - } else if (sess->opts->dry_run) { > + } > + if (sess->opts->dry_run) { > log_link(sess, f); > return 0; > } > @@ -282,7 +282,8 @@ pre_dev(struct upload *p, struct sess *s > if (!sess->opts->devices || getuid() != 0) { > WARNX("skipping non-regular file %s", f->path); > return 0; > - } else if (sess->opts->dry_run) { > + } > + if (sess->opts->dry_run) { > log_file(sess, f); > return 0; > } > @@ -369,7 +370,8 @@ pre_fifo(struct upload *p, struct sess * > if (!sess->opts->specials) { > WARNX("skipping non-regular file %s", f->path); > return 0; > - } else if (sess->opts->dry_run) { > + } > + if (sess->opts->dry_run) { > log_file(sess, f); > return 0; > } > @@ -444,7 +446,8 @@ pre_sock(struct upload *p, struct sess * > if (!sess->opts->specials) { > WARNX("skipping non-regular file %s", f->path); > return 0; > - } else if (sess->opts->dry_run) { > + } > + if (sess->opts->dry_run) { > log_file(sess, f); > return 0; > } > @@ -518,7 +521,8 @@ pre_dir(const struct upload *p, struct s > if (!sess->opts->recursive) { > WARNX("%s: ignoring directory", f->path); > return 0; > - } else if (sess->opts->dry_run) { > + } > + if (sess->opts->dry_run) { > log_dir(sess, f); > return 0; > } > @@ -579,13 +583,14 @@ post_dir(struct sess *sess, const struct > > if (!sess->opts->recursive) > return 1; > - else if (sess->opts->dry_run) > + if (sess->opts->dry_run) > return 1; > > if (fstatat(u->rootfd, f->path, , AT_SYMLINK_NOFOLLOW) == -1) { > ERR("%s: fstatat", f->path); > return 0; > - } else if (!S_ISDIR(st.st_mode)) { > + } > + if (!S_ISDIR(st.st_mode)) { > WARNX("%s: not a directory", f->path); > return 0; > } > @@ -631,12 +636,13 @@ post_dir(struct sess *sess, const struct > > /* > * Try to open the file at the current index. > - * If the file does not exist, returns with success. > + * If the file does not exist, returns with >0. > * Return <0 on failure, 0 on success w/nothing to be done, >0 on > * success and the file needs attention. > */ > static int > -pre_file(const struct upload *p, int *filefd, struct sess *sess) > +pre_file(const struct upload *p, int *filefd, struct stat *st, > +struct sess *sess) > { > const struct flist *f; > > @@ -654,19 +660,54 @@ pre_file(const struct upload *p, int *fi > > /* >* For non dry-run cases, we'll write the acknowledgement later > - * in the rsync_uploader() function because we need to wait for > - * the open() call to complete. > + * in the rsync_uploader() function. >* If the call to openat() fails with ENOENT, there's a > - * fast-path between here and the write function, so we won't do > - * any blocking between now and then. > + * fast-path between here and the write function. >*/ > > *filefd = openat(p->rootfd, f->path, > - O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0); > - if (*filefd != -1 || errno == ENOENT) > + O_RDONLY |
simplify the openrsync uploader
The rsync uploader (what is the generator in rsync) can be simplified and cleaned up a fair bit. There is some confusion of non-blocking IO on regular files and the idea to poll() between openat() and fstat(). This is all not needed and therefor a lot of the code handling files can be moved into pre_file. This also removes the UPLOAD_READ_LOCAL state since it is no longer needed. As a little extra gift this diff also plugs a mem leak in rsync_uploader. The mbuf buffer was not freed and so for every file being checksummed it would leak one of those. -- :wq Claudio Index: uploader.c === RCS file: /cvs/src/usr.bin/rsync/uploader.c,v retrieving revision 1.24 diff -u -p -r1.24 uploader.c --- uploader.c 22 Mar 2021 11:20:04 - 1.24 +++ uploader.c 5 May 2021 15:39:10 - @@ -33,8 +33,7 @@ enum uploadst { UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */ - UPLOAD_WRITE_LOCAL, /* wait to write to sender */ - UPLOAD_READ_LOCAL, /* wait to read from local file */ + UPLOAD_WRITE, /* wait to write to sender */ UPLOAD_FINISHED /* nothing more to do in phase */ }; @@ -180,7 +179,8 @@ pre_link(struct upload *p, struct sess * if (!sess->opts->preserve_links) { WARNX("%s: ignoring symlink", f->path); return 0; - } else if (sess->opts->dry_run) { + } + if (sess->opts->dry_run) { log_link(sess, f); return 0; } @@ -282,7 +282,8 @@ pre_dev(struct upload *p, struct sess *s if (!sess->opts->devices || getuid() != 0) { WARNX("skipping non-regular file %s", f->path); return 0; - } else if (sess->opts->dry_run) { + } + if (sess->opts->dry_run) { log_file(sess, f); return 0; } @@ -369,7 +370,8 @@ pre_fifo(struct upload *p, struct sess * if (!sess->opts->specials) { WARNX("skipping non-regular file %s", f->path); return 0; - } else if (sess->opts->dry_run) { + } + if (sess->opts->dry_run) { log_file(sess, f); return 0; } @@ -444,7 +446,8 @@ pre_sock(struct upload *p, struct sess * if (!sess->opts->specials) { WARNX("skipping non-regular file %s", f->path); return 0; - } else if (sess->opts->dry_run) { + } + if (sess->opts->dry_run) { log_file(sess, f); return 0; } @@ -518,7 +521,8 @@ pre_dir(const struct upload *p, struct s if (!sess->opts->recursive) { WARNX("%s: ignoring directory", f->path); return 0; - } else if (sess->opts->dry_run) { + } + if (sess->opts->dry_run) { log_dir(sess, f); return 0; } @@ -579,13 +583,14 @@ post_dir(struct sess *sess, const struct if (!sess->opts->recursive) return 1; - else if (sess->opts->dry_run) + if (sess->opts->dry_run) return 1; if (fstatat(u->rootfd, f->path, , AT_SYMLINK_NOFOLLOW) == -1) { ERR("%s: fstatat", f->path); return 0; - } else if (!S_ISDIR(st.st_mode)) { + } + if (!S_ISDIR(st.st_mode)) { WARNX("%s: not a directory", f->path); return 0; } @@ -631,12 +636,13 @@ post_dir(struct sess *sess, const struct /* * Try to open the file at the current index. - * If the file does not exist, returns with success. + * If the file does not exist, returns with >0. * Return <0 on failure, 0 on success w/nothing to be done, >0 on * success and the file needs attention. */ static int -pre_file(const struct upload *p, int *filefd, struct sess *sess) +pre_file(const struct upload *p, int *filefd, struct stat *st, +struct sess *sess) { const struct flist *f; @@ -654,19 +660,54 @@ pre_file(const struct upload *p, int *fi /* * For non dry-run cases, we'll write the acknowledgement later -* in the rsync_uploader() function because we need to wait for -* the open() call to complete. +* in the rsync_uploader() function. * If the call to openat() fails with ENOENT, there's a -* fast-path between here and the write function, so we won't do -* any blocking between now and then. +* fast-path between here and the write function. */ *filefd = openat(p->rootfd, f->path, - O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0); - if (*filefd != -1 || errno == ENOENT) + O_RDONLY | O_NOFOLLOW, 0); + + if (*filefd == -1 && errno != ENOENT) { + ERR("%s: openat", f->path); + return -1; + } + if (*filefd == -1) return 1; - ERR("%s: openat", f->path); -