On Fri, Dec 07, 2018 at 08:41:21AM +0100, Martijn van Duren wrote:
> I ran into a few minor nuisances with sed's -i mode, which are mostly
> compatible with gnu sed, but I reckon we should address.
>
> The problem is sed works by writing the output to a second file in the
> same directory as the original and after completion renaming the file
> to the original. This has two disadvantages:
> 1) The inode number changes, resulting in loss of carefully crafted
> hardlinks.
> 2) We require to have write permission in the same directory as the
> original file, even if we don't want to have a backup file.
>
> Diff below tries to solve this by doing the following.
> Copy the file to the backup location (/tmp/sedXXXXXXXXXX if no
> extension) and use the backup as the infile and the original as the
> outfile.
>
> Furthermore I changed the lstat to fstat, so we can edit symlinks (gsed
> supports symlinks by replacing the symlink by a new real file, which is
> also fun), and I extended the warning messages in process to show the
> backup file if we crash during operation, so people will always know
> where to recover the file in case of disaster.
>
> Because process also error(FATAL, ...)s during process and we always
> have a backup file I don't think the warning in sed.1 is worth keeping.
>
> The only downside to this new approach (that I can think of) is that we
> now temporarily have a file that is in an inconsistent state, but that's
> not much different to writing a file with any other editor.
>
> $ echo test > /usr/obj/test && dd if=/dev/zero of=/usr/obj/bloat
> /usr/obj: write failed, file system is full
> dd: /usr/obj/bloat: No space left on device
> 614113+0 records in
> 614112+0 records out
> 314425344 bytes transferred in 2.206 secs (142470196 bytes/sec)
> $ ./obj/sed -i -f /tmp/test.sed /usr/obj/test
>
>
> /usr/obj: write failed, file system is full
> sed: /usr/obj/test: No space left on device (backup at /tmp/sedgRPSLImG9N)
> $ cat /tmp/test.sed
> s/test/aaa<repeat to infinity>/
> $ cat /tmp/sedgRPSLImG9N
> test
> $ ls -i /tmp/test
> 104 /tmp/test
> $ sed -i s/test/foo/ /tmp/test
> $ ls -i /tmp/test
> 104 /tmp/test
> $ doas touch /etc/test
> $ doas chown martijn /etc/test
> $ echo test > /etc/test
> $ sed -i s/test/foo/ /etc/test
> $ cat /etc/test
> foo
>
> The diff does change quite a few mechanics, so some scrutiny is welcome.
>
> martijn@
>
> Index: main.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/main.c,v
> retrieving revision 1.39
> diff -u -p -r1.39 main.c
> --- main.c 6 Dec 2018 20:16:04 -0000 1.39
> +++ main.c 7 Dec 2018 07:31:54 -0000
> @@ -35,6 +35,7 @@
>
> #include <sys/types.h>
> #include <sys/ioctl.h>
> +#include <sys/param.h>
> #include <sys/stat.h>
>
> #include <ctype.h>
> @@ -94,13 +95,13 @@ static int rval; /* Exit status */
> */
> const char *fname; /* File name. */
> const char *outfname; /* Output file name */
> -static char oldfname[PATH_MAX]; /* Old file name (for in-place editing)
> */
> -static char tmpfname[PATH_MAX]; /* Temporary file name (for in-place
> editing) */
> +char oldfname[PATH_MAX]; /* Old file name (for in-place editing) */
> char *inplace; /* Inplace edit file extension */
> u_long linenum;
>
> static void add_compunit(enum e_cut, char *);
> static void add_file(char *);
> +int copy(FILE *, FILE *);
> static int next_files_have_lines(void);
>
> int termwidth;
> @@ -310,26 +311,46 @@ again:
> return (NULL);
> }
>
> +int
> +copy(FILE *src, FILE *dst)
> +{
> + unsigned char buf[MAXBSIZE];
> + size_t r, w, tw;
> +
> + while(1) {
> + if ((r = fread(buf, sizeof(*buf), sizeof(buf), src)) == 0) {
> + if (feof(src)) {
> + if (fflush(dst) == EOF)
> + return 0;
> + return 1;
> + }
> + if (errno != EINTR)
> + return 0;
> + continue;
> + }
> + tw = 0;
> + while(tw < r) {
> + w = fwrite(buf + tw, sizeof(*buf), r - tw, dst);
> + if (w == 0) {
> + if (errno != EINTR)
> + return 0;
> + continue;
> + }
> + tw += w;
> + }
> + }
> +}
> +
> void
> finish_file(void)
> {
> if (infile != NULL) {
> fclose(infile);
> - if (*oldfname != '\0') {
> - if (rename(fname, oldfname) != 0) {
> - warning("rename()");
> - unlink(tmpfname);
> - exit(1);
> - }
> + if (inplace != NULL) {
> + if (*oldfname == '\0')
> + unlink(oldfname);
This looks a bit odd.
> *oldfname = '\0';
> }
> - if (*tmpfname != '\0') {
> - if (outfile != NULL && outfile != stdout)
> - fclose(outfile);
> - outfile = NULL;
> - rename(tmpfname, fname);
> - *tmpfname = '\0';
> - }
> outfname = NULL;
> }
> }
> @@ -341,7 +362,7 @@ finish_file(void)
> int
> mf_fgets(SPACE *sp, enum e_spflag spflag)
> {
> - struct stat sb;
> + struct stat sb, isb;
> size_t len;
> char *p;
> int c, fd;
> @@ -369,10 +390,18 @@ mf_fgets(SPACE *sp, enum e_spflag spflag
> outfname = "stdout";
> break;
> }
> + infile = fopen(fname, inplace == NULL ? "r" : "r+");
> + if (infile == NULL) {
> + warning("%s", strerror(errno));
> + rval = 1;
> + continue;
> + }
> if (inplace != NULL) {
> - if (lstat(fname, &sb) != 0)
> + /* inplace reads from copy */
> + outfile = infile;
> + if (fstat(fileno(outfile), &sb) != 0)
> error(FATAL, "%s: %s", fname,
> - strerror(errno ? errno : EIO));
> + strerror(errno));
> if (!S_ISREG(sb.st_mode))
> error(FATAL, "%s: %s %s", fname,
> "in-place editing only",
> @@ -382,32 +411,48 @@ mf_fgets(SPACE *sp, enum e_spflag spflag
> sizeof(oldfname));
> len = strlcat(oldfname, inplace,
> sizeof(oldfname));
> - if (len > sizeof(oldfname))
> - error(FATAL, "%s: name too long",
> fname);
> - }
> - len = snprintf(tmpfname, sizeof(tmpfname),
> "%s/sedXXXXXXXXXX",
> - dirname(fname));
> - if (len >= sizeof(tmpfname))
> - error(FATAL, "%s: name too long", fname);
> - if ((fd = mkstemp(tmpfname)) == -1)
> - error(FATAL, "%s: %s", fname, strerror(errno));
> - if ((outfile = fdopen(fd, "w")) == NULL) {
> - unlink(tmpfname);
> - error(FATAL, "%s", fname);
> + if (len >= sizeof(oldfname))
> + error(FATAL, "backup %s: name too long",
> + fname);
> + (void)unlink(oldfname);
> + if ((infile = fopen(oldfname, "w+")) == NULL)
> + error(FATAL, "can't create backup "
> + "file: %s", oldfname);
> + if (fstat(fileno(infile), &isb) != 0)
> + error(FATAL, "%s: %s", oldfname,
> + strerror(errno));
> + if (!S_ISREG(isb.st_mode))
> + error(FATAL, "backup file %s not a "
> + "regular file and can't be "
> + "replaced", oldfname);
> + fchown(fileno(infile), sb.st_uid, sb.st_gid);
> + fchmod(fileno(infile), sb.st_mode & ALLPERMS);
> + } else {
> + (void)strlcpy(oldfname, "/tmp/sedXXXXXXXXXX",
> + sizeof(oldfname));
> + if ((fd = mkstemp(oldfname)) == -1)
> + error(FATAL, "%s: %s", fname,
> + strerror(errno));
> + if ((infile = fdopen(fd, "a+")) == NULL) {
> + unlink(oldfname);
> + error(FATAL, "%s", fname);
> + }
> }
> - fchown(fileno(outfile), sb.st_uid, sb.st_gid);
> - fchmod(fileno(outfile), sb.st_mode & ALLPERMS);
> - outfname = tmpfname;
> + outfname = fname;
> linenum = 0;
> resetstate();
> + if (!copy(outfile, infile)) {
> + unlink(oldfname);
> + error(FATAL, "%s: failed to create backup copy "
> + "to %s: %s", fname, oldfname,
> + strerror(errno));
> + }
> + ftruncate(fileno(outfile), 0);
> + rewind(outfile);
> + rewind(infile);
> } else {
> outfile = stdout;
> outfname = "stdout";
> - }
> - if ((infile = fopen(fname, "r")) == NULL) {
> - warning("%s", strerror(errno));
> - rval = 1;
> - continue;
> }
> }
>
> Index: process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 process.c
> --- process.c 14 Nov 2018 10:59:33 -0000 1.34
> +++ process.c 7 Dec 2018 07:31:54 -0000
> @@ -75,6 +75,7 @@ static int sdone; /* If any substitutes
> /* Iov structure for 'w' commands. */
> static regex_t *defpreg;
> size_t maxnsub;
> +extern char oldfname[PATH_MAX];
> regmatch_t *match;
>
> #define OUT() do {\
> @@ -473,7 +474,11 @@ flush_appends(void)
> break;
> }
> if (ferror(outfile))
> - error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
> + error(FATAL, "%s: %s%s%s%s", outfname,
> + strerror(errno ? errno : EIO),
> + *oldfname == '\0' ? "" : " (backup at ",
> + *oldfname == '\0' ? "" : oldfname,
> + *oldfname == '\0' ? "" : ")");
> appendx = sdone = 0;
> }
>
> @@ -513,7 +518,11 @@ lputs(char *s)
> (void)fputc('$', outfile);
> (void)fputc('\n', outfile);
> if (ferror(outfile))
> - error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
> + error(FATAL, "%s: %s%s%s%s", outfname,
> + strerror(errno ? errno : EIO),
> + *oldfname == '\0' ? "" : " (backup at ",
> + *oldfname == '\0' ? "" : oldfname,
> + *oldfname == '\0' ? "" : ")");
> }
>
> static inline int
> Index: sed.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/sed.1,v
> retrieving revision 1.57
> diff -u -p -r1.57 sed.1
> --- sed.1 14 Nov 2018 10:59:33 -0000 1.57
> +++ sed.1 7 Dec 2018 07:31:54 -0000
> @@ -102,10 +102,6 @@ Edit files in place, saving backups with
> If a zero length
> .Ar extension
> is given, no backup will be saved.
> -It is not recommended to give a zero length
> -.Ar extension
> -when in place editing files, as it risks corruption or partial content
> -in situations where disk space is exhausted, etc.
> In
> .Fl i
> mode, the hold space, line numbers, and ranges are reset between files.
>