On Mon, Jan 07 2019 15:41:53 -0500, Ted Unangst wrote:
> Lauri Tirkkonen wrote:
> > On Sun, Jan 06 2019 14:02:16 -0500, Ted Unangst wrote:
> > > Lauri Tirkkonen wrote:
> > > > Hi, another simple diff converting fgetln usage to getline.
> > > > 
> > > > I also considered converting grep_fgetln to grep_getline, but that would
> > > > mean that for FILE_MMAP we'd have to copy the string. So this diff keeps
> > > > the grep_fgetln interface as is, but avoids using fgetln from libc (and
> > > > adds error handling for FILE_STDIO).
> > > 
> > > this looks good and seems to work. thanks.
> > 
> > actually, it doesn't work :) the added error handling catches that
> > directory fd's are being passed into grep_fgetln, causing an error exit
> > with grep -r (or just grep foo /etc/*). I'm working on a second diff to
> > refactor the file handling a bit.
> 
> oh, interesting. that's sloppy. can you please fix that first, separately?

Sure, here it is. The idea is to use grep_fdopen for everything, making
grep_open just open the file and pass the fd to grep_fdopen, do fstat()
in grep_fdopen and modify mmopen() accordingly (it no longer needs to
open the file or stat it).

grep_tree is modified to not call procfile() on FTS_D (FTS_DP was
already there). In addition grep_fdopen returns NULL and sets errno ==
EISDIR if it encounters a directory; this can happen if a directory
argument is given on command line, or for recursive grep, if the file
being processed is a symlink to a directory, which fts doesn't notice.
procfile() handles the EISDIR return silently, matching current
behavior.

I also removed the useless mode argument from grep_open and grep_fdopen;
we always open files read-only.

diff --git a/usr.bin/grep/file.c b/usr.bin/grep/file.c
index 4b3c689e4ab..2befc4304c1 100644
--- a/usr.bin/grep/file.c
+++ b/usr.bin/grep/file.c
@@ -26,7 +26,10 @@
  * SUCH DAMAGE.
  */
 
+#include <sys/stat.h>
 #include <err.h>
+#include <errno.h>
+#include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <zlib.h>
@@ -90,68 +93,64 @@ gzfgetln(gzFile *f, size_t *len)
 #endif
 
 file_t *
-grep_fdopen(int fd, char *mode)
+grep_fdopen(int fd)
 {
        file_t *f;
+       struct stat sb;
 
        if (fd == STDIN_FILENO)
                snprintf(fname, sizeof fname, "(standard input)");
-       else
+       else if (fname[0] == '\0')
                snprintf(fname, sizeof fname, "(fd %d)", fd);
 
+       if (fstat(fd, &sb) == -1)
+               return NULL;
+       if (S_ISDIR(sb.st_mode)) {
+               errno = EISDIR;
+               return NULL;
+       }
+
        f = grep_malloc(sizeof *f);
 
 #ifndef NOZ
        if (Zflag) {
                f->type = FILE_GZIP;
                f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
-               if ((f->gzf = gzdopen(fd, mode)) != NULL)
+               if ((f->gzf = gzdopen(fd, "r")) != NULL)
                        return f;
-       } else
+       }
 #endif
-       {
-               f->type = FILE_STDIO;
-               f->noseek = isatty(fd);
-               if ((f->f = fdopen(fd, mode)) != NULL)
-                       return f;
+       f->noseek = isatty(fd);
+#ifndef SMALL
+       /* try mmap first; if it fails, try stdio */
+       if (!f->noseek && (f->mmf = mmopen(fd, &sb)) != NULL) {
+               f->type = FILE_MMAP;
+               return f;
        }
+#endif
+       f->type = FILE_STDIO;
+       if ((f->f = fdopen(fd, "r")) != NULL)
+               return f;
 
        free(f);
        return NULL;
 }
 
 file_t *
-grep_open(char *path, char *mode)
+grep_open(char *path)
 {
        file_t *f;
+       int fd;
 
        snprintf(fname, sizeof fname, "%s", path);
 
-       f = grep_malloc(sizeof *f);
-       f->noseek = 0;
-
-#ifndef NOZ
-       if (Zflag) {
-               f->type = FILE_GZIP;
-               if ((f->gzf = gzopen(fname, mode)) != NULL)
-                       return f;
-       } else
-#endif
-       {
-#ifndef SMALL
-               /* try mmap first; if it fails, try stdio */
-               if ((f->mmf = mmopen(fname, mode)) != NULL) {
-                       f->type = FILE_MMAP;
-                       return f;
-               }
-#endif
-               f->type = FILE_STDIO;
-               if ((f->f = fopen(path, mode)) != NULL)
-                       return f;
-       }
+       if ((fd = open(fname, O_RDONLY)) == -1)
+               return NULL;
 
-       free(f);
-       return NULL;
+       f = grep_fdopen(fd);
+       if (f == NULL)
+               close(fd);
+       return f;
 }
 
 int
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index 913cc97a0f3..e5e48fa163a 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -63,9 +63,7 @@ int    Fflag;         /* -F: interpret pattern as list of 
fixed strings */
 int     Hflag;         /* -H: always print filename header */
 int     Lflag;         /* -L: only show names of files with no matches */
 int     Rflag;         /* -R: recursively search directory trees */
-#ifndef NOZ
 int     Zflag;         /* -Z: decompress input before processing */
-#endif
 int     bflag;         /* -b: show block numbers for each match */
 int     cflag;         /* -c: only show a count of matching lines */
 int     hflag;         /* -h: don't print filename headers */
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index bbf7f8cd8c8..da9adebfc71 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -27,6 +27,7 @@
  */
 
 #include <sys/types.h>
+#include <sys/stat.h>
 
 #include <limits.h>
 #include <regex.h>
@@ -106,7 +107,7 @@ typedef struct mmfile {
        char    *base, *end, *ptr;
 } mmf_t;
 
-mmf_t          *mmopen(char *fn, char *mode);
+mmf_t          *mmopen(int fd, struct stat *sb);
 void            mmclose(mmf_t *mmf);
 char           *mmfgetln(mmf_t *mmf, size_t *l);
 
@@ -114,8 +115,8 @@ char                *mmfgetln(mmf_t *mmf, size_t *l);
 struct file;
 typedef struct file file_t;
 
-file_t         *grep_fdopen(int fd, char *mode);
-file_t         *grep_open(char *path, char *mode);
+file_t         *grep_fdopen(int fd);
+file_t         *grep_open(char *path);
 int             grep_bin_file(file_t *f);
 char           *grep_fgetln(file_t *f, size_t *l);
 void            grep_close(file_t *f);
diff --git a/usr.bin/grep/mmfile.c b/usr.bin/grep/mmfile.c
index d122453429f..ecab0f48b49 100644
--- a/usr.bin/grep/mmfile.c
+++ b/usr.bin/grep/mmfile.c
@@ -41,35 +41,23 @@
 #define MAX_MAP_LEN 1048576
 
 mmf_t *
-mmopen(char *fn, char *mode)
+mmopen(int fd, struct stat *st)
 {
        mmf_t *mmf;
-       struct stat st;
-
-       if (*mode != 'r')
-               return NULL;
 
        mmf = grep_malloc(sizeof *mmf);
-       if ((mmf->fd = open(fn, O_RDONLY)) == -1)
-               goto ouch1;
-       if (fstat(mmf->fd, &st) == -1)
-               goto ouch2;
-       if (st.st_size > SIZE_MAX) /* too big to mmap */
-               goto ouch2;
-       if (!S_ISREG(st.st_mode)) /* only mmap regular files */
-               goto ouch2;
-       mmf->len = (size_t)st.st_size;
+       if (st->st_size > SIZE_MAX) /* too big to mmap */
+               goto ouch;
+       mmf->len = (size_t)st->st_size;
        mmf->base = mmap(NULL, mmf->len, PROT_READ, MAP_PRIVATE, mmf->fd, 
(off_t)0);
        if (mmf->base == MAP_FAILED)
-               goto ouch2;
+               goto ouch;
        mmf->ptr = mmf->base;
        mmf->end = mmf->base + mmf->len;
        madvise(mmf->base, mmf->len, MADV_SEQUENTIAL);
        return mmf;
 
-ouch2:
-       close(mmf->fd);
-ouch1:
+ouch:
        free(mmf);
        return NULL;
 }
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index f7c5407d3e0..ccfae698120 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -77,6 +77,7 @@ grep_tree(char **argv)
                        if(!sflag)
                                warnc(p->fts_errno, "%s", p->fts_path);
                        break;
+               case FTS_D:
                case FTS_DP:
                        break;
                default:
@@ -101,11 +102,13 @@ procfile(char *fn)
 
        if (fn == NULL) {
                fn = "(standard input)";
-               f = grep_fdopen(STDIN_FILENO, "r");
+               f = grep_fdopen(STDIN_FILENO);
        } else {
-               f = grep_open(fn, "r");
+               f = grep_open(fn);
        }
        if (f == NULL) {
+               if (errno == EISDIR)
+                       return 0;
                file_err = 1;
                if (!sflag)
                        warn("%s", fn);

> (wrt getline, theo raised some concern that getline copies more data than
> fgetln does. we care quite a bit about grep performance, so we need to
> consider that some more.)

ok, thanks. It's true that getline does more copying, but note that
getline is only used for FILE_STDIO, ie. only if a) mmap fails, or b)
grep was built with -DSMALL.

-- 
Lauri Tirkkonen | lotheac @ IRCnet

Reply via email to