Hello,

locate(1) doesn't handle paths longer than MAXPATHLEN well: they are
silently split into MAXPATHLEN-long strings and each string is included
in the database. Here is an example :

 $ cat loc_test.sh
testdir='/tmp/locate_maxpathlen'
db="$testdir/db"

d='64_characters_long_directory_name_______________________________'
long_path="$testdir/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d"

mkdir -p $long_path || exit 1

cd "$long_path"
touch short_filename
touch long_filename_which_gets_truncated

/usr/libexec/locate.updatedb --fcodes="$db" \
                             --searchpaths="$testdir /usr/src" \
                            || exit 1

locate -d "$db" filename truncated | grep  -v "^/usr/src"

 $ sh loc_test.sh
/tmp/locate_maxpathlen/64_char_[... SNIP ...]_/long_filename_which_gets_
/tmp/locate_maxpathlen/64_char_[... SNIP ...]_/short_filename
truncated

We have just created the path "truncated" in the locate database.


The diff below includes the following changes :
        - locate(1) now silently ignores paths longer than MAXPATHLEN
        - MAXPATHLEN from <sys/param.h> is replaced by PATH_MAX from
        <limits.h>
        - add a missing prototype for sane_count()

Another solution would be to modify mklocatedb.sh or updatedb.sh to
filter out those loooong paths before they are fed to locate.code, but I
can't see a way to achieve this that doesn't require hardcoding PATH_MAX?

I ran /etc/weekly with the new binaries and didn't notice any
unexpected changes in the locate database.


Nicolas Bedos


Index: src/usr.bin/locate//bigram/locate.bigram.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/bigram/locate.bigram.c,v
retrieving revision 1.12
diff -u -p -u -r1.12 locate.bigram.c
--- src/usr.bin/locate//bigram/locate.bigram.c  27 Oct 2009 23:59:39 -0000      
1.12
+++ src/usr.bin/locate//bigram/locate.bigram.c  23 Nov 2014 16:49:47 -0000
@@ -43,13 +43,13 @@
  * Use 'code' to encode a file using this output.
  */
 
+#include <limits.h>                    /* for PATH_MAX */
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/param.h>                 /* for MAXPATHLEN */
 #include "locate.h"
 
-u_char buf1[MAXPATHLEN] = " ";
-u_char buf2[MAXPATHLEN];
+u_char buf1[PATH_MAX] = " ";
+u_char buf2[PATH_MAX];
 u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1];
 
 int
Index: src/usr.bin/locate//code/locate.code.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/code/locate.code.c,v
retrieving revision 1.17
diff -u -p -u -r1.17 locate.code.c
--- src/usr.bin/locate//code/locate.code.c      17 Nov 2013 20:19:36 -0000      
1.17
+++ src/usr.bin/locate//code/locate.code.c      23 Nov 2014 16:49:47 -0000
@@ -78,10 +78,10 @@
  *                     Wolfram Schneider, Berlin September 1996
  */
 
-#include <sys/param.h>
 
 #include <err.h>
 #include <errno.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -91,8 +91,8 @@
 
 #define        BGBUFSIZE       (NBG * 2)       /* size of bigram buffer */
 
-u_char buf1[MAXPATHLEN] = " ";
-u_char buf2[MAXPATHLEN];
+u_char buf1[PATH_MAX] = " ";
+u_char buf2[PATH_MAX];
 u_char bigrams[BGBUFSIZE + 1] = { 0 };
 
 #define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
@@ -171,6 +171,14 @@ main(int argc, char *argv[])
                        /* chop newline */
                        if (*cp == '\n')
                                *cp = '\0';
+               }
+
+               /* skip truncated lines */
+               if (cp == path + sizeof(buf2) - 1 && *(cp-1) != '\0') {
+                       while (fgets(path, sizeof(buf2), stdin) != NULL)
+                               if (strchr(path, '\n') != NULL)
+                                       break;
+                       continue;
                }
 
                /* Skip longest common prefix. */
Index: src/usr.bin/locate//locate/fastfind.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/fastfind.c,v
retrieving revision 1.11
diff -u -p -u -r1.11 fastfind.c
--- src/usr.bin/locate//locate/fastfind.c       25 Oct 2010 19:16:45 -0000      
1.11
+++ src/usr.bin/locate//locate/fastfind.c       23 Nov 2014 16:49:47 -0000
@@ -47,7 +47,7 @@ statistic (fp, path_fcodes)
        u_char *p, *s;
        int c;
        int count, umlaut;
-       u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+       u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
 
        for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
                p[c] = check_bigram_char(getc(fp));
@@ -140,7 +140,7 @@ fastfind
        int c, cc;
        int count, found, globflag;
        u_char *cutoff;
-       u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+       u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
 
 #ifdef FF_ICASE
        /* use a lookup table for case insensitive search */
Index: src/usr.bin/locate//locate/locate.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/locate.c,v
retrieving revision 1.25
diff -u -p -u -r1.25 locate.c
--- src/usr.bin/locate//locate/locate.c 13 Apr 2012 15:13:07 -0000      1.25
+++ src/usr.bin/locate//locate/locate.c 23 Nov 2014 16:49:47 -0000
@@ -63,11 +63,11 @@
  * in the standard 'find'.
  */
 
-#include <sys/param.h>
 #include <ctype.h>
 #include <err.h>
 #include <fnmatch.h>
 #include <libgen.h>
+#include <limits.h>
 #include <locale.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -112,6 +112,7 @@ void    fastfind(FILE *, char *, char *)
 void    fastfind_icase(FILE *, char *, char *);
 void    fastfind_mmap(char *, caddr_t, int, char *);
 void    fastfind_mmap_icase(char *, caddr_t, int, char *);
+void    sane_count(int);
 void   search_mmap(char *, char **);
 void   search_fopen(char *, char **);
 unsigned long cputime(void);
@@ -334,7 +335,7 @@ usage(void)
 void
 sane_count(int count)
 {
-       if (count < 0 || count >= MAXPATHLEN) {
+       if (count < 0 || count >= PATH_MAX) {
                fprintf(stderr, "locate: corrupted database\n");
                exit(1);
        }
Index: src/usr.bin/locate//locate/util.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/util.c,v
retrieving revision 1.12
diff -u -p -u -r1.12 util.c
--- src/usr.bin/locate//locate/util.c   16 Nov 2014 00:04:53 -0000      1.12
+++ src/usr.bin/locate//locate/util.c   23 Nov 2014 16:49:47 -0000
@@ -35,11 +35,11 @@
  */
 
 
-#include <stdlib.h>
-#include <string.h>
 #include <err.h>
-#include <sys/param.h>
+#include <limits.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
 
 #include "locate.h"
 
@@ -242,12 +242,12 @@ getwm(p)
 
        i = u.i;
 
-       if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+       if (i > PATH_MAX || i < -(PATH_MAX)) {
                i = ntohl(i);
-               if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+               if (i > PATH_MAX || i < -(PATH_MAX)) {
                        (void)fprintf(stderr,
-                           "integer out of +-MAXPATHLEN (%d): %d\n",
-                           MAXPATHLEN, i);
+                           "integer out of +-PATH_MAX (%d): %d\n",
+                           PATH_MAX, i);
                        exit(1);
                }
        }
@@ -270,12 +270,12 @@ getwf(fp)
 
        word = getw(fp);
 
-       if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+       if (word > PATH_MAX || word < -(PATH_MAX)) {
                word = ntohl(word);
-               if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+               if (word > PATH_MAX || word < -(PATH_MAX)) {
                        (void)fprintf(stderr,
-                           "integer out of +-MAXPATHLEN (%d): %d\n",
-                           MAXPATHLEN, word);
+                           "integer out of +-PATH_MAX (%d): %d\n",
+                           PATH_MAX, word);
                        exit(1);
                }
        }

Reply via email to