IMPALA-6638: Reduce file handle cache lock contention

FileHandleCache::OpenFileHandle() currently holds the
lock while opening a file handle. This lengthens the
duration holding the lock considerably, causing
contention when a lot of file handles are being
opened (i.e. when the cache is cold).

This changes FileHandleCache::OpenFileHandle() drops
the lock while opening the file handle, then
reacquires it to add the file handle to the cache.

When running a simple select on a table with 46801
Parquet files, this fixes a small performance overhead
for the cold cache case compared to having the
cache off. All results are averaged over 5 runs:

File handle cache off: 7.19s
File handle cache cold (no patch): 7.92s
File handle cache cold (with patch): 7.20s

When running a select that accesses and aggregates
4 columns from the same Parquet table (thus requiring
more scan ranges for the same file), the fix reduces
contention, particularly for the case with 8 IO threads
per disk:

1 IO thread per disk:
Without patch: 9.59s
With patch: 8.11s

8 IO threads per disk:
Without patch: 14.2s
With patch: 8.17s

The patch has no impact on the performance when the
cache is hot.

Change-Id: I4c695b21ca556e9c73c703c0c891e64939271c8d
Reviewed-on: http://gerrit.cloudera.org:8080/9576
Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/6d3fdee3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/6d3fdee3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/6d3fdee3

Branch: refs/heads/2.x
Commit: 6d3fdee3f6c80675e04d350e21a0dd99c8ff2a81
Parents: 2ff9623
Author: Joe McDonnell <joemcdonn...@cloudera.com>
Authored: Sat Mar 10 15:09:58 2018 -0800
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Tue Mar 13 05:33:26 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/io/handle-cache.inline.h | 61 +++++++++++++++++-----------
 1 file changed, 37 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/6d3fdee3/be/src/runtime/io/handle-cache.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/io/handle-cache.inline.h 
b/be/src/runtime/io/handle-cache.inline.h
index f0cced0..99b106c 100644
--- a/be/src/runtime/io/handle-cache.inline.h
+++ b/be/src/runtime/io/handle-cache.inline.h
@@ -86,16 +86,22 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle(
   // Hash the key and get appropriate partition
   int index = HashUtil::Hash(fname->data(), fname->size(), 0) % 
cache_partitions_.size();
   FileHandleCachePartition& p = cache_partitions_[index];
-  boost::lock_guard<SpinLock> g(p.lock);
-  pair<typename MapType::iterator, typename MapType::iterator> range =
-    p.cache.equal_range(*fname);
 
   // If this requires a new handle, skip to the creation codepath. Otherwise,
   // find an unused entry with the same mtime
-  FileHandleEntry* ret_elem = nullptr;
   if (!require_new_handle) {
+    boost::lock_guard<SpinLock> g(p.lock);
+    pair<typename MapType::iterator, typename MapType::iterator> range =
+      p.cache.equal_range(*fname);
+
+    // When picking a cached entry, always follow the ordering of the map and
+    // pick earlier entries first. This allows excessive entries for a file
+    // to age out. For example, if there are three entries for a file and only
+    // one is used at a time, only the first will be used and the other two
+    // can age out.
     while (range.first != range.second) {
       FileHandleEntry* elem = &range.first->second;
+      DCHECK(elem->fh.get() != nullptr);
       if (!elem->in_use && elem->fh->mtime() == mtime) {
         // This element is currently in the lru_list, which means that 
lru_entry must
         // be an iterator pointing into the lru_list.
@@ -104,35 +110,42 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle(
         // the lru_list by resetting its iterator to point to the end of the 
list.
         p.lru_list.erase(elem->lru_entry);
         elem->lru_entry = p.lru_list.end();
-        ret_elem = elem;
         *cache_hit = true;
-        break;
+        elem->in_use = true;
+        return elem->fh.get();
       }
       ++range.first;
     }
   }
 
   // There was no entry that was free or caller asked for a new handle
-  if (!ret_elem) {
-    *cache_hit = false;
-    // Create a new entry and move it into the map
-    CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), 
mtime);
-    if (!new_fh->ok()) {
-      delete new_fh;
-      return nullptr;
-    }
-    FileHandleEntry entry(new_fh, p.lru_list);
-    typename MapType::iterator new_it = p.cache.emplace_hint(range.second,
-        *fname, std::move(entry));
-    ret_elem = &new_it->second;
-    ++p.size;
-    if (p.size > p.capacity) EvictHandles(p);
+  // Opening a file handle requires talking to the NameNode, so construct
+  // the file handle without holding the lock to reduce contention.
+  *cache_hit = false;
+  // Create a new file handle
+  CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), 
mtime);
+  if (!new_fh->ok()) {
+    delete new_fh;
+    return nullptr;
   }
 
-  DCHECK(ret_elem->fh.get() != nullptr);
-  DCHECK(!ret_elem->in_use);
-  ret_elem->in_use = true;
-  return ret_elem->fh.get();
+  // Get the lock and create/move the new entry into the map
+  // This entry is new and will be immediately used. Place it as the first 
entry
+  // for this file in the multimap. The ordering is largely unimportant if all 
the
+  // existing entries are in use. However, if require_new_handle is true, 
there may be
+  // unused entries, so it would make more sense to insert the new entry at 
the front.
+  boost::lock_guard<SpinLock> g(p.lock);
+  pair<typename MapType::iterator, typename MapType::iterator> range =
+      p.cache.equal_range(*fname);
+  FileHandleEntry entry(new_fh, p.lru_list);
+  typename MapType::iterator new_it = p.cache.emplace_hint(range.first,
+      *fname, std::move(entry));
+  ++p.size;
+  if (p.size > p.capacity) EvictHandles(p);
+  FileHandleEntry* new_elem = &new_it->second;
+  DCHECK(!new_elem->in_use);
+  new_elem->in_use = true;
+  return new_elem->fh.get();
 }
 
 void FileHandleCache::ReleaseFileHandle(std::string* fname,

Reply via email to