Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)
On Sat, 2017-03-11 at 17:08 +, Emil Velikov wrote: > On 10 March 2017 at 10:51, Grazvydas Ignotas> wrote: > > > > On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceri > com> wrote: > > > > > > On 07/03/17 12:25, Alan Swanson wrote: > > > > > > > > > > > > Still using random selection of two-character subdirectory in > > > > which > > > > to check cache files rather than scanning entire cache. > > > > > > > > v2: Factor out double strlen call > > > > --- > > > > src/util/disk_cache.c | 78 > > > > +++ > > > > 1 file changed, 35 insertions(+), 43 deletions(-) > > > > > > > > diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c > > > > index 31a9336582..2a923be3dc 100644 > > > > --- a/src/util/disk_cache.c > > > > +++ b/src/util/disk_cache.c [SNIP] > > > > char *filename; > > > > > > > > dir = opendir(dir_path); > > > > if (dir == NULL) > > > > return NULL; > > > > > > > > - count = 0; > > > > - > > > > - while (1) { > > > > - entry = readdir(dir); > > > > - if (entry == NULL) > > > > - break; > > > > - if (!predicate(entry, dir_path)) > > > > - continue; > > > > - > > > > - count++; > > > > - } > > > > - > > > > - if (count == 0) { > > > > - closedir(dir); > > > > - return NULL; > > > > - } > > > > - > > > > - victim = rand() % count; > > > > - > > > > - rewinddir(dir); > > > > - count = 0; > > > > - > > > > while (1) { > > > > entry = readdir(dir); > > > > if (entry == NULL) > > > > break; > > > > if (!predicate(entry, dir_path)) > > > > continue; > > > > - if (count == victim) > > > > - break; > > > > > > > > - count++; > > > > + if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) { > > > > + if (!lru_atime || (sb.st_atime < lru_atime)) { > > > > +len = strlen(entry->d_name) + 1; > > > > +tmp = realloc(lru_name, len); > > > > +if (tmp) { > > > > + lru_name = tmp; > > > > > > > > > I don't think we need tmp here. Why not use lru_name directly? > > > > It *is* needed, otherwise if realloc fails, you'll leak old memory. > > > Indeed. Alternatively one can avoid all the strlen/realloc/free > dances > by using NAME_MAX. > > As per the POSIX manual > "The array d_name is of unspecified size, but shall contain a > filename > of at most {NAME_MAX} bytes followed by a terminating null byte." > > Not 100% sure if it'll play well with Hurd and similar platforms :-\ But here it's a very simple dance. The realloc should mostly become a noop, at least with glibc, after the first entry in cache and just activating for any .tmp files. Using MAX always seems like a lazy over allocation to me. A follow on patch will pass the sb and len to predicate functions instead saving them from calling fstat and strlen on same entry again. Will post that this weekend when I get back home. -- Alan. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)
On 10 March 2017 at 10:51, Grazvydas Ignotaswrote: > On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceri wrote: >> On 07/03/17 12:25, Alan Swanson wrote: >>> >>> Still using random selection of two-character subdirectory in which >>> to check cache files rather than scanning entire cache. >>> >>> v2: Factor out double strlen call >>> --- >>> src/util/disk_cache.c | 78 >>> +++ >>> 1 file changed, 35 insertions(+), 43 deletions(-) >>> >>> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c >>> index 31a9336582..2a923be3dc 100644 >>> --- a/src/util/disk_cache.c >>> +++ b/src/util/disk_cache.c >>> @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, >>> const cache_key key) >>> free(dir); >>> } >>> >>> -/* Given a directory path and predicate function, count all entries in >>> - * that directory for which the predicate returns true. Then choose a >>> - * random entry from among those counted. >>> +/* Given a directory path and predicate function, find the entry with >>> + * the oldest access time in that directory for which the predicate >>> + * returns true. >>> * >>> * Returns: A malloc'ed string for the path to the chosen file, (or >>> * NULL on any error). The caller should free the string when >>> * finished. >>> */ >>> static char * >>> -choose_random_file_matching(const char *dir_path, >>> -bool (*predicate)(const struct dirent *, >>> - const char *dir_path)) >>> +choose_lru_file_matching(const char *dir_path, >>> + bool (*predicate)(const struct dirent *, >>> + const char *dir_path)) >>> { >>> DIR *dir; >>> struct dirent *entry; >>> - unsigned int count, victim; >>> + struct stat sb; >>> + char *tmp, *lru_name = NULL; >>> + size_t len; >>> + time_t lru_atime = 0; >> >> >> Please try to declare variables where they are used. The original version of >> this file was written before we could use C99 features in Mesa. >> >> >> >>> char *filename; >>> >>> dir = opendir(dir_path); >>> if (dir == NULL) >>>return NULL; >>> >>> - count = 0; >>> - >>> - while (1) { >>> - entry = readdir(dir); >>> - if (entry == NULL) >>> - break; >>> - if (!predicate(entry, dir_path)) >>> - continue; >>> - >>> - count++; >>> - } >>> - >>> - if (count == 0) { >>> - closedir(dir); >>> - return NULL; >>> - } >>> - >>> - victim = rand() % count; >>> - >>> - rewinddir(dir); >>> - count = 0; >>> - >>> while (1) { >>>entry = readdir(dir); >>>if (entry == NULL) >>> break; >>>if (!predicate(entry, dir_path)) >>> continue; >>> - if (count == victim) >>> - break; >>> >>> - count++; >>> + if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) { >>> + if (!lru_atime || (sb.st_atime < lru_atime)) { >>> +len = strlen(entry->d_name) + 1; >>> +tmp = realloc(lru_name, len); >>> +if (tmp) { >>> + lru_name = tmp; >> >> >> I don't think we need tmp here. Why not use lru_name directly? > > It *is* needed, otherwise if realloc fails, you'll leak old memory. > Indeed. Alternatively one can avoid all the strlen/realloc/free dances by using NAME_MAX. As per the POSIX manual "The array d_name is of unspecified size, but shall contain a filename of at most {NAME_MAX} bytes followed by a terminating null byte." Not 100% sure if it'll play well with Hurd and similar platforms :-\ -Emil ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)
On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceriwrote: > On 07/03/17 12:25, Alan Swanson wrote: >> >> Still using random selection of two-character subdirectory in which >> to check cache files rather than scanning entire cache. >> >> v2: Factor out double strlen call >> --- >> src/util/disk_cache.c | 78 >> +++ >> 1 file changed, 35 insertions(+), 43 deletions(-) >> >> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c >> index 31a9336582..2a923be3dc 100644 >> --- a/src/util/disk_cache.c >> +++ b/src/util/disk_cache.c >> @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, >> const cache_key key) >> free(dir); >> } >> >> -/* Given a directory path and predicate function, count all entries in >> - * that directory for which the predicate returns true. Then choose a >> - * random entry from among those counted. >> +/* Given a directory path and predicate function, find the entry with >> + * the oldest access time in that directory for which the predicate >> + * returns true. >> * >> * Returns: A malloc'ed string for the path to the chosen file, (or >> * NULL on any error). The caller should free the string when >> * finished. >> */ >> static char * >> -choose_random_file_matching(const char *dir_path, >> -bool (*predicate)(const struct dirent *, >> - const char *dir_path)) >> +choose_lru_file_matching(const char *dir_path, >> + bool (*predicate)(const struct dirent *, >> + const char *dir_path)) >> { >> DIR *dir; >> struct dirent *entry; >> - unsigned int count, victim; >> + struct stat sb; >> + char *tmp, *lru_name = NULL; >> + size_t len; >> + time_t lru_atime = 0; > > > Please try to declare variables where they are used. The original version of > this file was written before we could use C99 features in Mesa. > > > >> char *filename; >> >> dir = opendir(dir_path); >> if (dir == NULL) >>return NULL; >> >> - count = 0; >> - >> - while (1) { >> - entry = readdir(dir); >> - if (entry == NULL) >> - break; >> - if (!predicate(entry, dir_path)) >> - continue; >> - >> - count++; >> - } >> - >> - if (count == 0) { >> - closedir(dir); >> - return NULL; >> - } >> - >> - victim = rand() % count; >> - >> - rewinddir(dir); >> - count = 0; >> - >> while (1) { >>entry = readdir(dir); >>if (entry == NULL) >> break; >>if (!predicate(entry, dir_path)) >> continue; >> - if (count == victim) >> - break; >> >> - count++; >> + if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) { >> + if (!lru_atime || (sb.st_atime < lru_atime)) { >> +len = strlen(entry->d_name) + 1; >> +tmp = realloc(lru_name, len); >> +if (tmp) { >> + lru_name = tmp; > > > I don't think we need tmp here. Why not use lru_name directly? It *is* needed, otherwise if realloc fails, you'll leak old memory. Gražvydas ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)
On 07/03/17 12:25, Alan Swanson wrote: Still using random selection of two-character subdirectory in which to check cache files rather than scanning entire cache. v2: Factor out double strlen call --- src/util/disk_cache.c | 78 +++ 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 31a9336582..2a923be3dc 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, const cache_key key) free(dir); } -/* Given a directory path and predicate function, count all entries in - * that directory for which the predicate returns true. Then choose a - * random entry from among those counted. +/* Given a directory path and predicate function, find the entry with + * the oldest access time in that directory for which the predicate + * returns true. * * Returns: A malloc'ed string for the path to the chosen file, (or * NULL on any error). The caller should free the string when * finished. */ static char * -choose_random_file_matching(const char *dir_path, -bool (*predicate)(const struct dirent *, - const char *dir_path)) +choose_lru_file_matching(const char *dir_path, + bool (*predicate)(const struct dirent *, + const char *dir_path)) { DIR *dir; struct dirent *entry; - unsigned int count, victim; + struct stat sb; + char *tmp, *lru_name = NULL; + size_t len; + time_t lru_atime = 0; Please try to declare variables where they are used. The original version of this file was written before we could use C99 features in Mesa. char *filename; dir = opendir(dir_path); if (dir == NULL) return NULL; - count = 0; - - while (1) { - entry = readdir(dir); - if (entry == NULL) - break; - if (!predicate(entry, dir_path)) - continue; - - count++; - } - - if (count == 0) { - closedir(dir); - return NULL; - } - - victim = rand() % count; - - rewinddir(dir); - count = 0; - while (1) { entry = readdir(dir); if (entry == NULL) break; if (!predicate(entry, dir_path)) continue; - if (count == victim) - break; - count++; + if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) { + if (!lru_atime || (sb.st_atime < lru_atime)) { +len = strlen(entry->d_name) + 1; +tmp = realloc(lru_name, len); +if (tmp) { + lru_name = tmp; I don't think we need tmp here. Why not use lru_name directly? With these couple of nits addressed and the make check test updated for patch 2 this series looks good to me. Thanks for writing it. If you can send these updates I'll push it once we have moved this stuff off into a thread [1]. [1] https://lists.freedesktop.org/archives/mesa-dev/2017-March/147442.html + memcpy(lru_name, entry->d_name, len); + lru_atime = sb.st_atime; +} + } + } } - if (entry == NULL) { + if (lru_name == NULL) { closedir(dir); return NULL; } - if (asprintf(, "%s/%s", dir_path, entry->d_name) < 0) + if (asprintf(, "%s/%s", dir_path, lru_name) < 0) filename = NULL; + free(lru_name); closedir(dir); return filename; @@ -533,12 +523,12 @@ is_regular_non_tmp_file(const struct dirent *entry, const char *path) /* Returns the size of the deleted file, (or 0 on any error). */ static size_t -unlink_random_file_from_directory(const char *path) +unlink_lru_file_from_directory(const char *path) { struct stat sb; char *filename; - filename = choose_random_file_matching(path, is_regular_non_tmp_file); + filename = choose_lru_file_matching(path, is_regular_non_tmp_file); if (filename == NULL) return 0; @@ -581,7 +571,7 @@ is_two_character_sub_directory(const struct dirent *entry, const char *path) } static void -evict_random_item(struct disk_cache *cache) +evict_lru_item(struct disk_cache *cache) { const char hex[] = "0123456789abcde"; char *dir_path; @@ -591,6 +581,7 @@ evict_random_item(struct disk_cache *cache) /* With a reasonably-sized, full cache, (and with keys generated * from a cryptographic hash), we can choose two random hex digits * and reasonably expect the directory to exist with a file in it. +* Provides pseudo-LRU eviction to reduce checking all cache files. */ a = rand() % 16; b = rand() % 16; @@ -598,7 +589,7 @@ evict_random_item(struct disk_cache *cache) if (asprintf(_path, "%s/%c%c", cache->path, hex[a], hex[b]) < 0) return; - size = unlink_random_file_from_directory(dir_path); + size = unlink_lru_file_from_directory(dir_path); free(dir_path); @@ -608,18 +599,19 @@
[Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)
Still using random selection of two-character subdirectory in which to check cache files rather than scanning entire cache. v2: Factor out double strlen call --- src/util/disk_cache.c | 78 +++ 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 31a9336582..2a923be3dc 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, const cache_key key) free(dir); } -/* Given a directory path and predicate function, count all entries in - * that directory for which the predicate returns true. Then choose a - * random entry from among those counted. +/* Given a directory path and predicate function, find the entry with + * the oldest access time in that directory for which the predicate + * returns true. * * Returns: A malloc'ed string for the path to the chosen file, (or * NULL on any error). The caller should free the string when * finished. */ static char * -choose_random_file_matching(const char *dir_path, -bool (*predicate)(const struct dirent *, - const char *dir_path)) +choose_lru_file_matching(const char *dir_path, + bool (*predicate)(const struct dirent *, + const char *dir_path)) { DIR *dir; struct dirent *entry; - unsigned int count, victim; + struct stat sb; + char *tmp, *lru_name = NULL; + size_t len; + time_t lru_atime = 0; char *filename; dir = opendir(dir_path); if (dir == NULL) return NULL; - count = 0; - - while (1) { - entry = readdir(dir); - if (entry == NULL) - break; - if (!predicate(entry, dir_path)) - continue; - - count++; - } - - if (count == 0) { - closedir(dir); - return NULL; - } - - victim = rand() % count; - - rewinddir(dir); - count = 0; - while (1) { entry = readdir(dir); if (entry == NULL) break; if (!predicate(entry, dir_path)) continue; - if (count == victim) - break; - count++; + if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) { + if (!lru_atime || (sb.st_atime < lru_atime)) { +len = strlen(entry->d_name) + 1; +tmp = realloc(lru_name, len); +if (tmp) { + lru_name = tmp; + memcpy(lru_name, entry->d_name, len); + lru_atime = sb.st_atime; +} + } + } } - if (entry == NULL) { + if (lru_name == NULL) { closedir(dir); return NULL; } - if (asprintf(, "%s/%s", dir_path, entry->d_name) < 0) + if (asprintf(, "%s/%s", dir_path, lru_name) < 0) filename = NULL; + free(lru_name); closedir(dir); return filename; @@ -533,12 +523,12 @@ is_regular_non_tmp_file(const struct dirent *entry, const char *path) /* Returns the size of the deleted file, (or 0 on any error). */ static size_t -unlink_random_file_from_directory(const char *path) +unlink_lru_file_from_directory(const char *path) { struct stat sb; char *filename; - filename = choose_random_file_matching(path, is_regular_non_tmp_file); + filename = choose_lru_file_matching(path, is_regular_non_tmp_file); if (filename == NULL) return 0; @@ -581,7 +571,7 @@ is_two_character_sub_directory(const struct dirent *entry, const char *path) } static void -evict_random_item(struct disk_cache *cache) +evict_lru_item(struct disk_cache *cache) { const char hex[] = "0123456789abcde"; char *dir_path; @@ -591,6 +581,7 @@ evict_random_item(struct disk_cache *cache) /* With a reasonably-sized, full cache, (and with keys generated * from a cryptographic hash), we can choose two random hex digits * and reasonably expect the directory to exist with a file in it. +* Provides pseudo-LRU eviction to reduce checking all cache files. */ a = rand() % 16; b = rand() % 16; @@ -598,7 +589,7 @@ evict_random_item(struct disk_cache *cache) if (asprintf(_path, "%s/%c%c", cache->path, hex[a], hex[b]) < 0) return; - size = unlink_random_file_from_directory(dir_path); + size = unlink_lru_file_from_directory(dir_path); free(dir_path); @@ -608,18 +599,19 @@ evict_random_item(struct disk_cache *cache) } /* In the case where the random choice of directory didn't find -* something, we choose randomly from the existing directories. +* something, we choose the least recently accessed from the +* existing directories. * * Really, the only reason this code exists is to allow the unit * tests to work, (which use an artificially-small cache to be able * to force a single cached item to be evicted). */ - dir_path = choose_random_file_matching(cache->path, -