Re: simplify the openrsync uploader

2021-05-06 Thread Claudio Jeker
On Wed, May 05, 2021 at 11:34:17PM +0200, Sebastian Benoit wrote:
> Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.05.05 17:53:20 +0200:
> > The rsync uploader (what is the generator in rsync) can be simplified and
> > cleaned up a fair bit.
> > 
> > There is some confusion of non-blocking IO on regular files and the idea
> > to poll() between openat() and fstat(). This is all not needed and
> > therefor a lot of the code handling files can be moved into pre_file.
> > This also removes the UPLOAD_READ_LOCAL state since it is no longer
> > needed.
> > 
> > As a little extra gift this diff also plugs a mem leak in rsync_uploader.
> > The mbuf buffer was not freed and so for every file being checksummed it
> > would leak one of those.
> 
> see below about that.
> 
> Other than that its ok.
> 
> > do {
> > msz = pread(*fileinfd, mbuf, blk.len, offs);
> > -   if (msz < 0) {
> > +   if ((size_t)msz != blk.len && (size_t)msz != blk.rem) {
> > ERR("pread");
> > close(*fileinfd);
> > *fileinfd = -1;
> > +   free(mbuf);
> 
> Isnt a free(blk.blks) needed as well, here and in more spots below?

Will investigate but I think that will be a different diff then.
 
> > return -1;
> > }
> > -   if ((size_t)msz != blk.len && (size_t)msz != blk.rem) {
> > -   /* short read, try again */
> > -   continue;
> > -   }
> > init_blk([i], , offs, i, mbuf, sess);
> > offs += blk.len;
> > LOG3(
> > @@ -947,6 +935,7 @@ rsync_uploader(struct upload *u, int *fi
> > i++;
> > } while (i < blk.blksz);
> >  
> > +   free(mbuf);
> > close(*fileinfd);
> > *fileinfd = -1;
> > LOG3("%s: mapped %jd B with %zu blocks",
> > 
> 

-- 
:wq Claudio



Re: simplify the openrsync uploader

2021-05-05 Thread Sebastian Benoit
Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.05.05 17:53:20 +0200:
> The rsync uploader (what is the generator in rsync) can be simplified and
> cleaned up a fair bit.
> 
> There is some confusion of non-blocking IO on regular files and the idea
> to poll() between openat() and fstat(). This is all not needed and
> therefor a lot of the code handling files can be moved into pre_file.
> This also removes the UPLOAD_READ_LOCAL state since it is no longer
> needed.
> 
> As a little extra gift this diff also plugs a mem leak in rsync_uploader.
> The mbuf buffer was not freed and so for every file being checksummed it
> would leak one of those.

see below about that.

Other than that its ok.

> 
> -- 
> :wq Claudio
> 
> Index: uploader.c
> ===
> RCS file: /cvs/src/usr.bin/rsync/uploader.c,v
> retrieving revision 1.24
> diff -u -p -r1.24 uploader.c
> --- uploader.c22 Mar 2021 11:20:04 -  1.24
> +++ uploader.c5 May 2021 15:39:10 -
> @@ -33,8 +33,7 @@
>  
>  enum uploadst {
>   UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */
> - UPLOAD_WRITE_LOCAL, /* wait to write to sender */
> - UPLOAD_READ_LOCAL, /* wait to read from local file */
> + UPLOAD_WRITE, /* wait to write to sender */
>   UPLOAD_FINISHED /* nothing more to do in phase */
>  };
>  
> @@ -180,7 +179,8 @@ pre_link(struct upload *p, struct sess *
>   if (!sess->opts->preserve_links) {
>   WARNX("%s: ignoring symlink", f->path);
>   return 0;
> - } else if (sess->opts->dry_run) {
> + }
> + if (sess->opts->dry_run) {
>   log_link(sess, f);
>   return 0;
>   }
> @@ -282,7 +282,8 @@ pre_dev(struct upload *p, struct sess *s
>   if (!sess->opts->devices || getuid() != 0) {
>   WARNX("skipping non-regular file %s", f->path);
>   return 0;
> - } else if (sess->opts->dry_run) {
> + }
> + if (sess->opts->dry_run) {
>   log_file(sess, f);
>   return 0;
>   }
> @@ -369,7 +370,8 @@ pre_fifo(struct upload *p, struct sess *
>   if (!sess->opts->specials) {
>   WARNX("skipping non-regular file %s", f->path);
>   return 0;
> - } else if (sess->opts->dry_run) {
> + }
> + if (sess->opts->dry_run) {
>   log_file(sess, f);
>   return 0;
>   }
> @@ -444,7 +446,8 @@ pre_sock(struct upload *p, struct sess *
>   if (!sess->opts->specials) {
>   WARNX("skipping non-regular file %s", f->path);
>   return 0;
> - } else if (sess->opts->dry_run) {
> + }
> + if (sess->opts->dry_run) {
>   log_file(sess, f);
>   return 0;
>   }
> @@ -518,7 +521,8 @@ pre_dir(const struct upload *p, struct s
>   if (!sess->opts->recursive) {
>   WARNX("%s: ignoring directory", f->path);
>   return 0;
> - } else if (sess->opts->dry_run) {
> + }
> + if (sess->opts->dry_run) {
>   log_dir(sess, f);
>   return 0;
>   }
> @@ -579,13 +583,14 @@ post_dir(struct sess *sess, const struct
>  
>   if (!sess->opts->recursive)
>   return 1;
> - else if (sess->opts->dry_run)
> + if (sess->opts->dry_run)
>   return 1;
>  
>   if (fstatat(u->rootfd, f->path, , AT_SYMLINK_NOFOLLOW) == -1) {
>   ERR("%s: fstatat", f->path);
>   return 0;
> - } else if (!S_ISDIR(st.st_mode)) {
> + }
> + if (!S_ISDIR(st.st_mode)) {
>   WARNX("%s: not a directory", f->path);
>   return 0;
>   }
> @@ -631,12 +636,13 @@ post_dir(struct sess *sess, const struct
>  
>  /*
>   * Try to open the file at the current index.
> - * If the file does not exist, returns with success.
> + * If the file does not exist, returns with >0.
>   * Return <0 on failure, 0 on success w/nothing to be done, >0 on
>   * success and the file needs attention.
>   */
>  static int
> -pre_file(const struct upload *p, int *filefd, struct sess *sess)
> +pre_file(const struct upload *p, int *filefd, struct stat *st,
> +struct sess *sess)
>  {
>   const struct flist *f;
>  
> @@ -654,19 +660,54 @@ pre_file(const struct upload *p, int *fi
>  
>   /*
>* For non dry-run cases, we'll write the acknowledgement later
> -  * in the rsync_uploader() function because we need to wait for
> -  * the open() call to complete.
> +  * in the rsync_uploader() function.
>* If the call to openat() fails with ENOENT, there's a
> -  * fast-path between here and the write function, so we won't do
> -  * any blocking between now and then.
> +  * fast-path between here and the write function.
>*/
>  
>   *filefd = openat(p->rootfd, f->path,
> - O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0);
> - if (*filefd != -1 || errno == ENOENT)
> + O_RDONLY | 

simplify the openrsync uploader

2021-05-05 Thread Claudio Jeker
The rsync uploader (what is the generator in rsync) can be simplified and
cleaned up a fair bit.

There is some confusion of non-blocking IO on regular files and the idea
to poll() between openat() and fstat(). This is all not needed and
therefor a lot of the code handling files can be moved into pre_file.
This also removes the UPLOAD_READ_LOCAL state since it is no longer
needed.

As a little extra gift this diff also plugs a mem leak in rsync_uploader.
The mbuf buffer was not freed and so for every file being checksummed it
would leak one of those.

-- 
:wq Claudio

Index: uploader.c
===
RCS file: /cvs/src/usr.bin/rsync/uploader.c,v
retrieving revision 1.24
diff -u -p -r1.24 uploader.c
--- uploader.c  22 Mar 2021 11:20:04 -  1.24
+++ uploader.c  5 May 2021 15:39:10 -
@@ -33,8 +33,7 @@
 
 enum   uploadst {
UPLOAD_FIND_NEXT = 0, /* find next to upload to sender */
-   UPLOAD_WRITE_LOCAL, /* wait to write to sender */
-   UPLOAD_READ_LOCAL, /* wait to read from local file */
+   UPLOAD_WRITE, /* wait to write to sender */
UPLOAD_FINISHED /* nothing more to do in phase */
 };
 
@@ -180,7 +179,8 @@ pre_link(struct upload *p, struct sess *
if (!sess->opts->preserve_links) {
WARNX("%s: ignoring symlink", f->path);
return 0;
-   } else if (sess->opts->dry_run) {
+   }
+   if (sess->opts->dry_run) {
log_link(sess, f);
return 0;
}
@@ -282,7 +282,8 @@ pre_dev(struct upload *p, struct sess *s
if (!sess->opts->devices || getuid() != 0) {
WARNX("skipping non-regular file %s", f->path);
return 0;
-   } else if (sess->opts->dry_run) {
+   }
+   if (sess->opts->dry_run) {
log_file(sess, f);
return 0;
}
@@ -369,7 +370,8 @@ pre_fifo(struct upload *p, struct sess *
if (!sess->opts->specials) {
WARNX("skipping non-regular file %s", f->path);
return 0;
-   } else if (sess->opts->dry_run) {
+   }
+   if (sess->opts->dry_run) {
log_file(sess, f);
return 0;
}
@@ -444,7 +446,8 @@ pre_sock(struct upload *p, struct sess *
if (!sess->opts->specials) {
WARNX("skipping non-regular file %s", f->path);
return 0;
-   } else if (sess->opts->dry_run) {
+   }
+   if (sess->opts->dry_run) {
log_file(sess, f);
return 0;
}
@@ -518,7 +521,8 @@ pre_dir(const struct upload *p, struct s
if (!sess->opts->recursive) {
WARNX("%s: ignoring directory", f->path);
return 0;
-   } else if (sess->opts->dry_run) {
+   }
+   if (sess->opts->dry_run) {
log_dir(sess, f);
return 0;
}
@@ -579,13 +583,14 @@ post_dir(struct sess *sess, const struct
 
if (!sess->opts->recursive)
return 1;
-   else if (sess->opts->dry_run)
+   if (sess->opts->dry_run)
return 1;
 
if (fstatat(u->rootfd, f->path, , AT_SYMLINK_NOFOLLOW) == -1) {
ERR("%s: fstatat", f->path);
return 0;
-   } else if (!S_ISDIR(st.st_mode)) {
+   }
+   if (!S_ISDIR(st.st_mode)) {
WARNX("%s: not a directory", f->path);
return 0;
}
@@ -631,12 +636,13 @@ post_dir(struct sess *sess, const struct
 
 /*
  * Try to open the file at the current index.
- * If the file does not exist, returns with success.
+ * If the file does not exist, returns with >0.
  * Return <0 on failure, 0 on success w/nothing to be done, >0 on
  * success and the file needs attention.
  */
 static int
-pre_file(const struct upload *p, int *filefd, struct sess *sess)
+pre_file(const struct upload *p, int *filefd, struct stat *st,
+struct sess *sess)
 {
const struct flist *f;
 
@@ -654,19 +660,54 @@ pre_file(const struct upload *p, int *fi
 
/*
 * For non dry-run cases, we'll write the acknowledgement later
-* in the rsync_uploader() function because we need to wait for
-* the open() call to complete.
+* in the rsync_uploader() function.
 * If the call to openat() fails with ENOENT, there's a
-* fast-path between here and the write function, so we won't do
-* any blocking between now and then.
+* fast-path between here and the write function.
 */
 
*filefd = openat(p->rootfd, f->path,
-   O_RDONLY | O_NOFOLLOW | O_NONBLOCK, 0);
-   if (*filefd != -1 || errno == ENOENT)
+   O_RDONLY | O_NOFOLLOW, 0);
+
+   if (*filefd == -1 && errno != ENOENT) {
+   ERR("%s: openat", f->path);
+   return -1;
+   }
+   if (*filefd == -1)
return 1;
-   ERR("%s: openat", f->path);
-