Module Name: src Committed By: andvar Date: Thu May 23 22:07:16 UTC 2024
Modified Files: src/external/bsd/tre/dist/src: agrep.c Log Message: agrep(1): rewrite binary check introduced with recursive search on rev 1.3 to match more closely grep(1) implementation. Instead of opening and scanning full file every time, check for the \0 symbol once the file buffer is filled up for the first time. Patch fixes stdin, adds binary check for it as well, and works more efficiently (at the cost of not scanning the full file). Also original implementation forgot to add break if binary file is detected, causing duplicate binary matches in the output result. Due to full file scans on each match instead of once for the specific file, scans may have been considerably slow. Fixes PR bin/53513 Needs pullups to netbsd-9,-10. Thanks for the help from mlelstv and dh in reviewing and finalizing the patch. To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/external/bsd/tre/dist/src/agrep.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/external/bsd/tre/dist/src/agrep.c diff -u src/external/bsd/tre/dist/src/agrep.c:1.4 src/external/bsd/tre/dist/src/agrep.c:1.5 --- src/external/bsd/tre/dist/src/agrep.c:1.4 Thu Aug 20 15:54:11 2020 +++ src/external/bsd/tre/dist/src/agrep.c Thu May 23 22:07:16 2024 @@ -179,6 +179,7 @@ static int next_delim_len; /* Length of static int delim_after = 1;/* If true, print the delimiter after the record. */ static int at_eof; static int have_matches; /* If true, matches have been found. */ +static int is_binary; /* -1 unknown, 0 ascii, 1 binary */ static int invert_match; /* Show only non-matching records. */ static int print_filename; /* Output filename. */ @@ -200,6 +201,12 @@ static regaparams_t match_params; environment variable GREP_COLOR overrides this default value. */ static const char *highlight = "01;31"; +static int +isbinaryfile(void) +{ + return buf != NULL && memchr(buf, '\0', data_len) != NULL; +} + /* Sets `record' to the next complete record from file `fd', and `record_len' to the length of the record. Returns 1 when there are no more records, 0 otherwise. */ @@ -262,6 +269,9 @@ tre_agrep_get_next_record(int fd, const } data_len += r; next_record = buf; + + if (is_binary < 0) + is_binary = isbinaryfile(); } /* Find the next record delimiter. */ @@ -316,41 +326,6 @@ tre_agrep_get_next_record(int fd, const #include <dirent.h> -static int -isbinaryfile(const char *filename) -{ - struct stat st; - size_t size; - size_t i; - char *mapped; - FILE *fp; - int isbin; - - if ((fp = fopen(filename, "r")) == NULL) { - return 1; - } - fstat(fileno(fp), &st); - isbin = 0; - if ((st.st_mode & S_IFMT) != S_IFREG) { - isbin = 1; - } else { - size = (size_t)st.st_size; - mapped = mmap(NULL, size, PROT_READ, MAP_SHARED, fileno(fp), 0); - if (mapped == MAP_FAILED) { - fclose(fp); - return 1; - } - for (i = 0 ; !isbin && i < size ; i++) { - if (mapped[i] == 0x0) { - isbin = 1; - } - } - munmap(mapped, size); - } - fclose(fp); - return isbin; -} - static int tre_agrep_handle_file(const char */*filename*/); static int @@ -398,6 +373,8 @@ tre_agrep_handle_file(const char *filena int count = 0; int recnum = 0; + is_binary = -1; + /* Allocate the initial buffer. */ if (buf == NULL) { @@ -488,10 +465,12 @@ tre_agrep_handle_file(const char *filena printf("%s\n", filename); break; } - else if (!count_matches && isbinaryfile(filename)) + else if (!count_matches && is_binary > 0) { if (print_filename) - printf("Binary file %s matches\n", filename); + printf("%s:", filename); + printf("Binary file matches\n"); + break; } else if (!count_matches) {