On 10/16/2016 10:35 PM, Gregory Szorc wrote:
# HG changeset patch
# User Gregory Szorc <gregory.sz...@gmail.com>
# Date 1476649322 25200
#      Sun Oct 16 13:22:02 2016 -0700
# Node ID 735adf139447190fe52c65ec6e9b4e5e0c81d6aa
# Parent  757d25d2bbe63fc560e92b6bb25fbfbf09b09342
revlog: add a context manager to allow file handle reuse

For your information, given the proximity of the freeze (tomorrow,
November 18th), this is unlikely to get attention in time. If we do not get to this, please resend when 4.0 is out on November first,

For details see:

Currently, read-only operations traversing revlogs must open a new
file handle whenever they need uncached data from the underlying
revlog. This can add overhead to operations such as changegroup

The revlog APIs have a mechanism for reusing a file descriptor
for I/O. This was added by me a while ago as a means to speed up
revlog.addgroup(). At that time, I didn't do any work around
reusing file handles for read operations.

This patch introduces a context manager to cache an open file handle
on a revlog. When the context manager is active, revlog reads will
be routed to the opened file handle instead of opening a single
use file handle.

There is definitely room to improve the API. We could probably
even refactor the write file descriptor caching to use a context
manager - possibly the same one! However, this is a bit of work
since the write file handle can be swapped out if a revlog
transitions from inline to non-inline in the course of adding

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -9,16 +9,17 @@

 This provides efficient delta storage with O(1) retrieve and append
 and O(changes) merge between branches.

 from __future__ import absolute_import

 import collections
+import contextlib
 import errno
 import hashlib
 import os
 import struct
 import zlib

 # import stuff from node for others to import from revlog
 from .node import (
@@ -309,16 +310,18 @@ class revlog(object):
         self.index, nodemap, self._chunkcache = d
         if nodemap is not None:
             self.nodemap = self._nodecache = nodemap
         if not self._chunkcache:
         # revnum -> (chain-length, sum-delta-length)
         self._chaininfocache = {}

+        self._readfh = None
     def tip(self):
         return self.node(len(self.index) - 2)
     def __contains__(self, rev):
         return 0 <= rev < len(self)
     def __len__(self):
         return len(self.index) - 1
     def __iter__(self):
         return iter(xrange(len(self)))
@@ -1013,27 +1016,57 @@ class revlog(object):
         o, d = self._chunkcache
         # try to add to existing cache
         if o + len(d) == offset and len(d) + len(data) < _chunksize:
             self._chunkcache = o, d + data
             self._chunkcache = offset, data

+    @contextlib.contextmanager
+    def cachefilehandle(self):
+        """Maintain a persistent file handle during operations.
+        When this context manager is active, a file descriptor will be reused
+        for all read operations, ensuring the underlying revlog file isn't
+        reopened multiple times.
+        """
+        if self._readfh:
+            raise error.Abort('cachefilehandle already active')
+        # Inline revlogs have data chunks cached at open time. If we opened
+        # a file handle it wouldn't be read.
+        if self._inline:
+            yield
+            return
+        try:
+            self._readfh = self.opener(self.datafile)
+            yield
+        finally:
+            self._readfh.close()
+            self._readfh = None
     def _loadchunk(self, offset, length, df=None):
         """Load a segment of raw data from the revlog.

         Accepts an absolute offset, length to read, and an optional existing
         file handle to read from.

         If an existing file handle is passed, it will be seeked and the
         original seek position will NOT be restored.

         Returns a str or buffer of raw byte data.
+        # Use cached file handle from context manager automatically.
+        # self._readfh is always opened in read-only mode. df may be opened
+        # in append mode. The context manager should only be active during
+        # read operations. So this mismatch is OK.
+        df = df or self._readfh
         if df is not None:
             closehandle = False
             if self._inline:
                 df = self.opener(self.indexfile)
                 df = self.opener(self.datafile)
             closehandle = True
Mercurial-devel mailing list

Pierre-Yves David
Mercurial-devel mailing list

Reply via email to