Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)

2017-03-11 Thread Alan Swanson
On Sat, 2017-03-11 at 17:08 +, Emil Velikov wrote:
> On 10 March 2017 at 10:51, Grazvydas Ignotas 
> wrote:
> > 
> > On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceri  > com> wrote:
> > > 
> > > On 07/03/17 12:25, Alan Swanson wrote:
> > > > 
> > > > 
> > > > Still using random selection of two-character subdirectory in
> > > > which
> > > > to check cache files rather than scanning entire cache.
> > > > 
> > > > v2: Factor out double strlen call
> > > > ---
> > > >  src/util/disk_cache.c | 78
> > > > +++
> > > >  1 file changed, 35 insertions(+), 43 deletions(-)
> > > > 
> > > > diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> > > > index 31a9336582..2a923be3dc 100644
> > > > --- a/src/util/disk_cache.c
> > > > +++ b/src/util/disk_cache.c

[SNIP]

> > > > char *filename;
> > > > 
> > > > dir = opendir(dir_path);
> > > > if (dir == NULL)
> > > >    return NULL;
> > > > 
> > > > -   count = 0;
> > > > -
> > > > -   while (1) {
> > > > -  entry = readdir(dir);
> > > > -  if (entry == NULL)
> > > > - break;
> > > > -  if (!predicate(entry, dir_path))
> > > > - continue;
> > > > -
> > > > -  count++;
> > > > -   }
> > > > -
> > > > -   if (count == 0) {
> > > > -  closedir(dir);
> > > > -  return NULL;
> > > > -   }
> > > > -
> > > > -   victim = rand() % count;
> > > > -
> > > > -   rewinddir(dir);
> > > > -   count = 0;
> > > > -
> > > > while (1) {
> > > >    entry = readdir(dir);
> > > >    if (entry == NULL)
> > > >   break;
> > > >    if (!predicate(entry, dir_path))
> > > >   continue;
> > > > -  if (count == victim)
> > > > - break;
> > > > 
> > > > -  count++;
> > > > +  if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) {
> > > > + if (!lru_atime || (sb.st_atime < lru_atime)) {
> > > > +len = strlen(entry->d_name) + 1;
> > > > +tmp = realloc(lru_name, len);
> > > > +if (tmp) {
> > > > +   lru_name = tmp;
> > > 
> > > 
> > > I don't think we need tmp here. Why not use lru_name directly?
> > 
> > It *is* needed, otherwise if realloc fails, you'll leak old memory.
> > 
> Indeed. Alternatively one can avoid all the strlen/realloc/free
> dances
> by using NAME_MAX.
> 
> As per the POSIX manual
> "The array d_name is of unspecified size, but shall contain a
> filename
> of at most {NAME_MAX} bytes followed by a terminating null byte."
> 
> Not 100% sure if it'll play well with Hurd and similar platforms :-\

But here it's a very simple dance. The realloc should mostly become a
noop, at least with glibc, after the first entry in cache and just
activating for any .tmp files. Using MAX always seems like a lazy over
allocation to me.

A follow on patch will pass the sb and len to predicate functions
instead saving them from calling fstat and strlen on same entry again.
Will post that this weekend when I get back home.

-- 
Alan.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)

2017-03-11 Thread Emil Velikov
On 10 March 2017 at 10:51, Grazvydas Ignotas  wrote:
> On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceri  wrote:
>> On 07/03/17 12:25, Alan Swanson wrote:
>>>
>>> Still using random selection of two-character subdirectory in which
>>> to check cache files rather than scanning entire cache.
>>>
>>> v2: Factor out double strlen call
>>> ---
>>>  src/util/disk_cache.c | 78
>>> +++
>>>  1 file changed, 35 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
>>> index 31a9336582..2a923be3dc 100644
>>> --- a/src/util/disk_cache.c
>>> +++ b/src/util/disk_cache.c
>>> @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache,
>>> const cache_key key)
>>> free(dir);
>>>  }
>>>
>>> -/* Given a directory path and predicate function, count all entries in
>>> - * that directory for which the predicate returns true. Then choose a
>>> - * random entry from among those counted.
>>> +/* Given a directory path and predicate function, find the entry with
>>> + * the oldest access time in that directory for which the predicate
>>> + * returns true.
>>>   *
>>>   * Returns: A malloc'ed string for the path to the chosen file, (or
>>>   * NULL on any error). The caller should free the string when
>>>   * finished.
>>>   */
>>>  static char *
>>> -choose_random_file_matching(const char *dir_path,
>>> -bool (*predicate)(const struct dirent *,
>>> -  const char *dir_path))
>>> +choose_lru_file_matching(const char *dir_path,
>>> + bool (*predicate)(const struct dirent *,
>>> +   const char *dir_path))
>>>  {
>>> DIR *dir;
>>> struct dirent *entry;
>>> -   unsigned int count, victim;
>>> +   struct stat sb;
>>> +   char *tmp, *lru_name = NULL;
>>> +   size_t len;
>>> +   time_t lru_atime = 0;
>>
>>
>> Please try to declare variables where they are used. The original version of
>> this file was written before we could use C99 features in Mesa.
>>
>>
>>
>>> char *filename;
>>>
>>> dir = opendir(dir_path);
>>> if (dir == NULL)
>>>return NULL;
>>>
>>> -   count = 0;
>>> -
>>> -   while (1) {
>>> -  entry = readdir(dir);
>>> -  if (entry == NULL)
>>> - break;
>>> -  if (!predicate(entry, dir_path))
>>> - continue;
>>> -
>>> -  count++;
>>> -   }
>>> -
>>> -   if (count == 0) {
>>> -  closedir(dir);
>>> -  return NULL;
>>> -   }
>>> -
>>> -   victim = rand() % count;
>>> -
>>> -   rewinddir(dir);
>>> -   count = 0;
>>> -
>>> while (1) {
>>>entry = readdir(dir);
>>>if (entry == NULL)
>>>   break;
>>>if (!predicate(entry, dir_path))
>>>   continue;
>>> -  if (count == victim)
>>> - break;
>>>
>>> -  count++;
>>> +  if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) {
>>> + if (!lru_atime || (sb.st_atime < lru_atime)) {
>>> +len = strlen(entry->d_name) + 1;
>>> +tmp = realloc(lru_name, len);
>>> +if (tmp) {
>>> +   lru_name = tmp;
>>
>>
>> I don't think we need tmp here. Why not use lru_name directly?
>
> It *is* needed, otherwise if realloc fails, you'll leak old memory.
>
Indeed. Alternatively one can avoid all the strlen/realloc/free dances
by using NAME_MAX.

As per the POSIX manual
"The array d_name is of unspecified size, but shall contain a filename
of at most {NAME_MAX} bytes followed by a terminating null byte."

Not 100% sure if it'll play well with Hurd and similar platforms :-\

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)

2017-03-10 Thread Grazvydas Ignotas
On Fri, Mar 10, 2017 at 6:23 AM, Timothy Arceri  wrote:
> On 07/03/17 12:25, Alan Swanson wrote:
>>
>> Still using random selection of two-character subdirectory in which
>> to check cache files rather than scanning entire cache.
>>
>> v2: Factor out double strlen call
>> ---
>>  src/util/disk_cache.c | 78
>> +++
>>  1 file changed, 35 insertions(+), 43 deletions(-)
>>
>> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
>> index 31a9336582..2a923be3dc 100644
>> --- a/src/util/disk_cache.c
>> +++ b/src/util/disk_cache.c
>> @@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache,
>> const cache_key key)
>> free(dir);
>>  }
>>
>> -/* Given a directory path and predicate function, count all entries in
>> - * that directory for which the predicate returns true. Then choose a
>> - * random entry from among those counted.
>> +/* Given a directory path and predicate function, find the entry with
>> + * the oldest access time in that directory for which the predicate
>> + * returns true.
>>   *
>>   * Returns: A malloc'ed string for the path to the chosen file, (or
>>   * NULL on any error). The caller should free the string when
>>   * finished.
>>   */
>>  static char *
>> -choose_random_file_matching(const char *dir_path,
>> -bool (*predicate)(const struct dirent *,
>> -  const char *dir_path))
>> +choose_lru_file_matching(const char *dir_path,
>> + bool (*predicate)(const struct dirent *,
>> +   const char *dir_path))
>>  {
>> DIR *dir;
>> struct dirent *entry;
>> -   unsigned int count, victim;
>> +   struct stat sb;
>> +   char *tmp, *lru_name = NULL;
>> +   size_t len;
>> +   time_t lru_atime = 0;
>
>
> Please try to declare variables where they are used. The original version of
> this file was written before we could use C99 features in Mesa.
>
>
>
>> char *filename;
>>
>> dir = opendir(dir_path);
>> if (dir == NULL)
>>return NULL;
>>
>> -   count = 0;
>> -
>> -   while (1) {
>> -  entry = readdir(dir);
>> -  if (entry == NULL)
>> - break;
>> -  if (!predicate(entry, dir_path))
>> - continue;
>> -
>> -  count++;
>> -   }
>> -
>> -   if (count == 0) {
>> -  closedir(dir);
>> -  return NULL;
>> -   }
>> -
>> -   victim = rand() % count;
>> -
>> -   rewinddir(dir);
>> -   count = 0;
>> -
>> while (1) {
>>entry = readdir(dir);
>>if (entry == NULL)
>>   break;
>>if (!predicate(entry, dir_path))
>>   continue;
>> -  if (count == victim)
>> - break;
>>
>> -  count++;
>> +  if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) {
>> + if (!lru_atime || (sb.st_atime < lru_atime)) {
>> +len = strlen(entry->d_name) + 1;
>> +tmp = realloc(lru_name, len);
>> +if (tmp) {
>> +   lru_name = tmp;
>
>
> I don't think we need tmp here. Why not use lru_name directly?

It *is* needed, otherwise if realloc fails, you'll leak old memory.

Gražvydas
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)

2017-03-09 Thread Timothy Arceri

On 07/03/17 12:25, Alan Swanson wrote:

Still using random selection of two-character subdirectory in which
to check cache files rather than scanning entire cache.

v2: Factor out double strlen call
---
 src/util/disk_cache.c | 78 +++
 1 file changed, 35 insertions(+), 43 deletions(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 31a9336582..2a923be3dc 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, const 
cache_key key)
free(dir);
 }

-/* Given a directory path and predicate function, count all entries in
- * that directory for which the predicate returns true. Then choose a
- * random entry from among those counted.
+/* Given a directory path and predicate function, find the entry with
+ * the oldest access time in that directory for which the predicate
+ * returns true.
  *
  * Returns: A malloc'ed string for the path to the chosen file, (or
  * NULL on any error). The caller should free the string when
  * finished.
  */
 static char *
-choose_random_file_matching(const char *dir_path,
-bool (*predicate)(const struct dirent *,
-  const char *dir_path))
+choose_lru_file_matching(const char *dir_path,
+ bool (*predicate)(const struct dirent *,
+   const char *dir_path))
 {
DIR *dir;
struct dirent *entry;
-   unsigned int count, victim;
+   struct stat sb;
+   char *tmp, *lru_name = NULL;
+   size_t len;
+   time_t lru_atime = 0;


Please try to declare variables where they are used. The original 
version of this file was written before we could use C99 features in Mesa.




char *filename;

dir = opendir(dir_path);
if (dir == NULL)
   return NULL;

-   count = 0;
-
-   while (1) {
-  entry = readdir(dir);
-  if (entry == NULL)
- break;
-  if (!predicate(entry, dir_path))
- continue;
-
-  count++;
-   }
-
-   if (count == 0) {
-  closedir(dir);
-  return NULL;
-   }
-
-   victim = rand() % count;
-
-   rewinddir(dir);
-   count = 0;
-
while (1) {
   entry = readdir(dir);
   if (entry == NULL)
  break;
   if (!predicate(entry, dir_path))
  continue;
-  if (count == victim)
- break;

-  count++;
+  if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) {
+ if (!lru_atime || (sb.st_atime < lru_atime)) {
+len = strlen(entry->d_name) + 1;
+tmp = realloc(lru_name, len);
+if (tmp) {
+   lru_name = tmp;


I don't think we need tmp here. Why not use lru_name directly?

With these couple of nits addressed and the make check test updated for 
patch 2 this series looks good to me. Thanks for writing it.


If you can send these updates I'll push it once we have moved this stuff 
off into a thread [1].


[1] https://lists.freedesktop.org/archives/mesa-dev/2017-March/147442.html


+   memcpy(lru_name, entry->d_name, len);
+   lru_atime = sb.st_atime;
+}
+ }
+  }
}

-   if (entry == NULL) {
+   if (lru_name == NULL) {
   closedir(dir);
   return NULL;
}

-   if (asprintf(, "%s/%s", dir_path, entry->d_name) < 0)
+   if (asprintf(, "%s/%s", dir_path, lru_name) < 0)
   filename = NULL;

+   free(lru_name);
closedir(dir);

return filename;
@@ -533,12 +523,12 @@ is_regular_non_tmp_file(const struct dirent *entry, const 
char *path)

 /* Returns the size of the deleted file, (or 0 on any error). */
 static size_t
-unlink_random_file_from_directory(const char *path)
+unlink_lru_file_from_directory(const char *path)
 {
struct stat sb;
char *filename;

-   filename = choose_random_file_matching(path, is_regular_non_tmp_file);
+   filename = choose_lru_file_matching(path, is_regular_non_tmp_file);
if (filename == NULL)
   return 0;

@@ -581,7 +571,7 @@ is_two_character_sub_directory(const struct dirent *entry, 
const char *path)
 }

 static void
-evict_random_item(struct disk_cache *cache)
+evict_lru_item(struct disk_cache *cache)
 {
const char hex[] = "0123456789abcde";
char *dir_path;
@@ -591,6 +581,7 @@ evict_random_item(struct disk_cache *cache)
/* With a reasonably-sized, full cache, (and with keys generated
 * from a cryptographic hash), we can choose two random hex digits
 * and reasonably expect the directory to exist with a file in it.
+* Provides pseudo-LRU eviction to reduce checking all cache files.
 */
a = rand() % 16;
b = rand() % 16;
@@ -598,7 +589,7 @@ evict_random_item(struct disk_cache *cache)
if (asprintf(_path, "%s/%c%c", cache->path, hex[a], hex[b]) < 0)
   return;

-   size = unlink_random_file_from_directory(dir_path);
+   size = unlink_lru_file_from_directory(dir_path);

free(dir_path);

@@ -608,18 +599,19 @@ 

[Mesa-dev] [PATCH 1/3] util/disk_cache: use LRU eviction rather than random eviction (v2)

2017-03-06 Thread Alan Swanson
Still using random selection of two-character subdirectory in which
to check cache files rather than scanning entire cache.

v2: Factor out double strlen call
---
 src/util/disk_cache.c | 78 +++
 1 file changed, 35 insertions(+), 43 deletions(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 31a9336582..2a923be3dc 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -438,70 +438,60 @@ make_cache_file_directory(struct disk_cache *cache, const 
cache_key key)
free(dir);
 }
 
-/* Given a directory path and predicate function, count all entries in
- * that directory for which the predicate returns true. Then choose a
- * random entry from among those counted.
+/* Given a directory path and predicate function, find the entry with
+ * the oldest access time in that directory for which the predicate
+ * returns true.
  *
  * Returns: A malloc'ed string for the path to the chosen file, (or
  * NULL on any error). The caller should free the string when
  * finished.
  */
 static char *
-choose_random_file_matching(const char *dir_path,
-bool (*predicate)(const struct dirent *,
-  const char *dir_path))
+choose_lru_file_matching(const char *dir_path,
+ bool (*predicate)(const struct dirent *,
+   const char *dir_path))
 {
DIR *dir;
struct dirent *entry;
-   unsigned int count, victim;
+   struct stat sb;
+   char *tmp, *lru_name = NULL;
+   size_t len;
+   time_t lru_atime = 0;
char *filename;
 
dir = opendir(dir_path);
if (dir == NULL)
   return NULL;
 
-   count = 0;
-
-   while (1) {
-  entry = readdir(dir);
-  if (entry == NULL)
- break;
-  if (!predicate(entry, dir_path))
- continue;
-
-  count++;
-   }
-
-   if (count == 0) {
-  closedir(dir);
-  return NULL;
-   }
-
-   victim = rand() % count;
-
-   rewinddir(dir);
-   count = 0;
-
while (1) {
   entry = readdir(dir);
   if (entry == NULL)
  break;
   if (!predicate(entry, dir_path))
  continue;
-  if (count == victim)
- break;
 
-  count++;
+  if (fstatat(dirfd(dir), entry->d_name, , 0) == 0) {
+ if (!lru_atime || (sb.st_atime < lru_atime)) {
+len = strlen(entry->d_name) + 1;
+tmp = realloc(lru_name, len);
+if (tmp) {
+   lru_name = tmp;
+   memcpy(lru_name, entry->d_name, len);
+   lru_atime = sb.st_atime;
+}
+ }
+  }
}
 
-   if (entry == NULL) {
+   if (lru_name == NULL) {
   closedir(dir);
   return NULL;
}
 
-   if (asprintf(, "%s/%s", dir_path, entry->d_name) < 0)
+   if (asprintf(, "%s/%s", dir_path, lru_name) < 0)
   filename = NULL;
 
+   free(lru_name);
closedir(dir);
 
return filename;
@@ -533,12 +523,12 @@ is_regular_non_tmp_file(const struct dirent *entry, const 
char *path)
 
 /* Returns the size of the deleted file, (or 0 on any error). */
 static size_t
-unlink_random_file_from_directory(const char *path)
+unlink_lru_file_from_directory(const char *path)
 {
struct stat sb;
char *filename;
 
-   filename = choose_random_file_matching(path, is_regular_non_tmp_file);
+   filename = choose_lru_file_matching(path, is_regular_non_tmp_file);
if (filename == NULL)
   return 0;
 
@@ -581,7 +571,7 @@ is_two_character_sub_directory(const struct dirent *entry, 
const char *path)
 }
 
 static void
-evict_random_item(struct disk_cache *cache)
+evict_lru_item(struct disk_cache *cache)
 {
const char hex[] = "0123456789abcde";
char *dir_path;
@@ -591,6 +581,7 @@ evict_random_item(struct disk_cache *cache)
/* With a reasonably-sized, full cache, (and with keys generated
 * from a cryptographic hash), we can choose two random hex digits
 * and reasonably expect the directory to exist with a file in it.
+* Provides pseudo-LRU eviction to reduce checking all cache files.
 */
a = rand() % 16;
b = rand() % 16;
@@ -598,7 +589,7 @@ evict_random_item(struct disk_cache *cache)
if (asprintf(_path, "%s/%c%c", cache->path, hex[a], hex[b]) < 0)
   return;
 
-   size = unlink_random_file_from_directory(dir_path);
+   size = unlink_lru_file_from_directory(dir_path);
 
free(dir_path);
 
@@ -608,18 +599,19 @@ evict_random_item(struct disk_cache *cache)
}
 
/* In the case where the random choice of directory didn't find
-* something, we choose randomly from the existing directories.
+* something, we choose the least recently accessed from the
+* existing directories.
 *
 * Really, the only reason this code exists is to allow the unit
 * tests to work, (which use an artificially-small cache to be able
 * to force a single cached item to be evicted).
 */
-   dir_path = choose_random_file_matching(cache->path,
-