Hello,
locate(1) doesn't handle paths longer than MAXPATHLEN well: they are
silently split into MAXPATHLEN-long strings and each string is included
in the database. Here is an example :
$ cat loc_test.sh
testdir='/tmp/locate_maxpathlen'
db="$testdir/db"
d='64_characters_long_directory_name_______________________________'
long_path="$testdir/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d/$d"
mkdir -p $long_path || exit 1
cd "$long_path"
touch short_filename
touch long_filename_which_gets_truncated
/usr/libexec/locate.updatedb --fcodes="$db" \
--searchpaths="$testdir /usr/src" \
|| exit 1
locate -d "$db" filename truncated | grep -v "^/usr/src"
$ sh loc_test.sh
/tmp/locate_maxpathlen/64_char_[... SNIP ...]_/long_filename_which_gets_
/tmp/locate_maxpathlen/64_char_[... SNIP ...]_/short_filename
truncated
We have just created the path "truncated" in the locate database.
The diff below includes the following changes :
- locate(1) now silently ignores paths longer than MAXPATHLEN
- MAXPATHLEN from <sys/param.h> is replaced by PATH_MAX from
<limits.h>
- add a missing prototype for sane_count()
Another solution would be to modify mklocatedb.sh or updatedb.sh to
filter out those loooong paths before they are fed to locate.code, but I
can't see a way to achieve this that doesn't require hardcoding PATH_MAX?
I ran /etc/weekly with the new binaries and didn't notice any
unexpected changes in the locate database.
Nicolas Bedos
Index: src/usr.bin/locate//bigram/locate.bigram.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/bigram/locate.bigram.c,v
retrieving revision 1.12
diff -u -p -u -r1.12 locate.bigram.c
--- src/usr.bin/locate//bigram/locate.bigram.c 27 Oct 2009 23:59:39 -0000
1.12
+++ src/usr.bin/locate//bigram/locate.bigram.c 23 Nov 2014 16:49:47 -0000
@@ -43,13 +43,13 @@
* Use 'code' to encode a file using this output.
*/
+#include <limits.h> /* for PATH_MAX */
#include <stdio.h>
#include <stdlib.h>
-#include <sys/param.h> /* for MAXPATHLEN */
#include "locate.h"
-u_char buf1[MAXPATHLEN] = " ";
-u_char buf2[MAXPATHLEN];
+u_char buf1[PATH_MAX] = " ";
+u_char buf2[PATH_MAX];
u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1];
int
Index: src/usr.bin/locate//code/locate.code.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/code/locate.code.c,v
retrieving revision 1.17
diff -u -p -u -r1.17 locate.code.c
--- src/usr.bin/locate//code/locate.code.c 17 Nov 2013 20:19:36 -0000
1.17
+++ src/usr.bin/locate//code/locate.code.c 23 Nov 2014 16:49:47 -0000
@@ -78,10 +78,10 @@
* Wolfram Schneider, Berlin September 1996
*/
-#include <sys/param.h>
#include <err.h>
#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -91,8 +91,8 @@
#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */
-u_char buf1[MAXPATHLEN] = " ";
-u_char buf2[MAXPATHLEN];
+u_char buf1[PATH_MAX] = " ";
+u_char buf2[PATH_MAX];
u_char bigrams[BGBUFSIZE + 1] = { 0 };
#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
@@ -171,6 +171,14 @@ main(int argc, char *argv[])
/* chop newline */
if (*cp == '\n')
*cp = '\0';
+ }
+
+ /* skip truncated lines */
+ if (cp == path + sizeof(buf2) - 1 && *(cp-1) != '\0') {
+ while (fgets(path, sizeof(buf2), stdin) != NULL)
+ if (strchr(path, '\n') != NULL)
+ break;
+ continue;
}
/* Skip longest common prefix. */
Index: src/usr.bin/locate//locate/fastfind.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/fastfind.c,v
retrieving revision 1.11
diff -u -p -u -r1.11 fastfind.c
--- src/usr.bin/locate//locate/fastfind.c 25 Oct 2010 19:16:45 -0000
1.11
+++ src/usr.bin/locate//locate/fastfind.c 23 Nov 2014 16:49:47 -0000
@@ -47,7 +47,7 @@ statistic (fp, path_fcodes)
u_char *p, *s;
int c;
int count, umlaut;
- u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+ u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
p[c] = check_bigram_char(getc(fp));
@@ -140,7 +140,7 @@ fastfind
int c, cc;
int count, found, globflag;
u_char *cutoff;
- u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+ u_char bigram1[NBG], bigram2[NBG], path[PATH_MAX];
#ifdef FF_ICASE
/* use a lookup table for case insensitive search */
Index: src/usr.bin/locate//locate/locate.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/locate.c,v
retrieving revision 1.25
diff -u -p -u -r1.25 locate.c
--- src/usr.bin/locate//locate/locate.c 13 Apr 2012 15:13:07 -0000 1.25
+++ src/usr.bin/locate//locate/locate.c 23 Nov 2014 16:49:47 -0000
@@ -63,11 +63,11 @@
* in the standard 'find'.
*/
-#include <sys/param.h>
#include <ctype.h>
#include <err.h>
#include <fnmatch.h>
#include <libgen.h>
+#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
@@ -112,6 +112,7 @@ void fastfind(FILE *, char *, char *)
void fastfind_icase(FILE *, char *, char *);
void fastfind_mmap(char *, caddr_t, int, char *);
void fastfind_mmap_icase(char *, caddr_t, int, char *);
+void sane_count(int);
void search_mmap(char *, char **);
void search_fopen(char *, char **);
unsigned long cputime(void);
@@ -334,7 +335,7 @@ usage(void)
void
sane_count(int count)
{
- if (count < 0 || count >= MAXPATHLEN) {
+ if (count < 0 || count >= PATH_MAX) {
fprintf(stderr, "locate: corrupted database\n");
exit(1);
}
Index: src/usr.bin/locate//locate/util.c
===================================================================
RCS file: /cvs/src/usr.bin/locate/locate/util.c,v
retrieving revision 1.12
diff -u -p -u -r1.12 util.c
--- src/usr.bin/locate//locate/util.c 16 Nov 2014 00:04:53 -0000 1.12
+++ src/usr.bin/locate//locate/util.c 23 Nov 2014 16:49:47 -0000
@@ -35,11 +35,11 @@
*/
-#include <stdlib.h>
-#include <string.h>
#include <err.h>
-#include <sys/param.h>
+#include <limits.h>
#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
#include "locate.h"
@@ -242,12 +242,12 @@ getwm(p)
i = u.i;
- if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ if (i > PATH_MAX || i < -(PATH_MAX)) {
i = ntohl(i);
- if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ if (i > PATH_MAX || i < -(PATH_MAX)) {
(void)fprintf(stderr,
- "integer out of +-MAXPATHLEN (%d): %d\n",
- MAXPATHLEN, i);
+ "integer out of +-PATH_MAX (%d): %d\n",
+ PATH_MAX, i);
exit(1);
}
}
@@ -270,12 +270,12 @@ getwf(fp)
word = getw(fp);
- if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ if (word > PATH_MAX || word < -(PATH_MAX)) {
word = ntohl(word);
- if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ if (word > PATH_MAX || word < -(PATH_MAX)) {
(void)fprintf(stderr,
- "integer out of +-MAXPATHLEN (%d): %d\n",
- MAXPATHLEN, word);
+ "integer out of +-PATH_MAX (%d): %d\n",
+ PATH_MAX, word);
exit(1);
}
}