Repository: impala Updated Branches: refs/heads/master 8283081bf -> 0d7787fe4
IMPALA-6638: Reduce file handle cache lock contention FileHandleCache::OpenFileHandle() currently holds the lock while opening a file handle. This lengthens the duration holding the lock considerably, causing contention when a lot of file handles are being opened (i.e. when the cache is cold). This changes FileHandleCache::OpenFileHandle() drops the lock while opening the file handle, then reacquires it to add the file handle to the cache. When running a simple select on a table with 46801 Parquet files, this fixes a small performance overhead for the cold cache case compared to having the cache off. All results are averaged over 5 runs: File handle cache off: 7.19s File handle cache cold (no patch): 7.92s File handle cache cold (with patch): 7.20s When running a select that accesses and aggregates 4 columns from the same Parquet table (thus requiring more scan ranges for the same file), the fix reduces contention, particularly for the case with 8 IO threads per disk: 1 IO thread per disk: Without patch: 9.59s With patch: 8.11s 8 IO threads per disk: Without patch: 14.2s With patch: 8.17s The patch has no impact on the performance when the cache is hot. Change-Id: I4c695b21ca556e9c73c703c0c891e64939271c8d Reviewed-on: http://gerrit.cloudera.org:8080/9576 Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/impala/repo Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3839826e Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3839826e Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3839826e Branch: refs/heads/master Commit: 3839826e69e9dff2062464f7eb1bdfed66ca7702 Parents: 8283081 Author: Joe McDonnell <joemcdonn...@cloudera.com> Authored: Sat Mar 10 15:09:58 2018 -0800 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Tue Mar 13 02:40:07 2018 +0000 ---------------------------------------------------------------------- be/src/runtime/io/handle-cache.inline.h | 61 +++++++++++++++++----------- 1 file changed, 37 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/impala/blob/3839826e/be/src/runtime/io/handle-cache.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/io/handle-cache.inline.h b/be/src/runtime/io/handle-cache.inline.h index f0cced0..99b106c 100644 --- a/be/src/runtime/io/handle-cache.inline.h +++ b/be/src/runtime/io/handle-cache.inline.h @@ -86,16 +86,22 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle( // Hash the key and get appropriate partition int index = HashUtil::Hash(fname->data(), fname->size(), 0) % cache_partitions_.size(); FileHandleCachePartition& p = cache_partitions_[index]; - boost::lock_guard<SpinLock> g(p.lock); - pair<typename MapType::iterator, typename MapType::iterator> range = - p.cache.equal_range(*fname); // If this requires a new handle, skip to the creation codepath. Otherwise, // find an unused entry with the same mtime - FileHandleEntry* ret_elem = nullptr; if (!require_new_handle) { + boost::lock_guard<SpinLock> g(p.lock); + pair<typename MapType::iterator, typename MapType::iterator> range = + p.cache.equal_range(*fname); + + // When picking a cached entry, always follow the ordering of the map and + // pick earlier entries first. This allows excessive entries for a file + // to age out. For example, if there are three entries for a file and only + // one is used at a time, only the first will be used and the other two + // can age out. while (range.first != range.second) { FileHandleEntry* elem = &range.first->second; + DCHECK(elem->fh.get() != nullptr); if (!elem->in_use && elem->fh->mtime() == mtime) { // This element is currently in the lru_list, which means that lru_entry must // be an iterator pointing into the lru_list. @@ -104,35 +110,42 @@ CachedHdfsFileHandle* FileHandleCache::GetFileHandle( // the lru_list by resetting its iterator to point to the end of the list. p.lru_list.erase(elem->lru_entry); elem->lru_entry = p.lru_list.end(); - ret_elem = elem; *cache_hit = true; - break; + elem->in_use = true; + return elem->fh.get(); } ++range.first; } } // There was no entry that was free or caller asked for a new handle - if (!ret_elem) { - *cache_hit = false; - // Create a new entry and move it into the map - CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), mtime); - if (!new_fh->ok()) { - delete new_fh; - return nullptr; - } - FileHandleEntry entry(new_fh, p.lru_list); - typename MapType::iterator new_it = p.cache.emplace_hint(range.second, - *fname, std::move(entry)); - ret_elem = &new_it->second; - ++p.size; - if (p.size > p.capacity) EvictHandles(p); + // Opening a file handle requires talking to the NameNode, so construct + // the file handle without holding the lock to reduce contention. + *cache_hit = false; + // Create a new file handle + CachedHdfsFileHandle* new_fh = new CachedHdfsFileHandle(fs, fname->data(), mtime); + if (!new_fh->ok()) { + delete new_fh; + return nullptr; } - DCHECK(ret_elem->fh.get() != nullptr); - DCHECK(!ret_elem->in_use); - ret_elem->in_use = true; - return ret_elem->fh.get(); + // Get the lock and create/move the new entry into the map + // This entry is new and will be immediately used. Place it as the first entry + // for this file in the multimap. The ordering is largely unimportant if all the + // existing entries are in use. However, if require_new_handle is true, there may be + // unused entries, so it would make more sense to insert the new entry at the front. + boost::lock_guard<SpinLock> g(p.lock); + pair<typename MapType::iterator, typename MapType::iterator> range = + p.cache.equal_range(*fname); + FileHandleEntry entry(new_fh, p.lru_list); + typename MapType::iterator new_it = p.cache.emplace_hint(range.first, + *fname, std::move(entry)); + ++p.size; + if (p.size > p.capacity) EvictHandles(p); + FileHandleEntry* new_elem = &new_it->second; + DCHECK(!new_elem->in_use); + new_elem->in_use = true; + return new_elem->fh.get(); } void FileHandleCache::ReleaseFileHandle(std::string* fname,