Re: [PATCH 7 of 7] context: floor adjustlinkrev graph walk during copy tracing

2018-09-10 Thread Martin von Zweigbergk via Mercurial-devel
On Fri, Sep 7, 2018 at 8:14 AM Boris Feld  wrote:

> # HG changeset patch
> # User Boris Feld 
> # Date 1536255188 14400
> #  Thu Sep 06 13:33:08 2018 -0400
> # Node ID 0f720a4aa166d08daaa39e8462f2291f77e825c1
> # Parent  441c39342d63c75ee101587b2fbf3af60800762f
> # EXP-Topic copy-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 0f720a4aa166
> context: floor adjustlinkrev graph walk during copy tracing
>
> The `_adjustlinkrev` method gains an optional "floor" argument. The linkrev
> adjustment will give up once this floor is reached. The relevant functions
> using `_adjustlinkrev` are updated to pass an appropriate value in the copy
> tracing code.
>
> In some private repository, about 10% of the status call triggered
> pathological case addressed by this change. The speedup varies from one
> call
> to another, the best-observed win is moving from 170s to 11s.
>
> diff --git a/mercurial/context.py b/mercurial/context.py
> --- a/mercurial/context.py
> +++ b/mercurial/context.py
> @@ -633,7 +633,7 @@ class basefilectx(object):
>  def _changeid(self):
>  return self._findchangeid()
>
> -def _findchangeid(self):
> +def _findchangeid(self, floor=None):
>

Could we call it "stoprev" like we do in revlog.ancestors() and a few other
places (assuming it's conceptually the same thing)?

 if r'_changeid' in self.__dict__:
>  changeid = self._changeid
>  elif r'_changectx' in self.__dict__:
> @@ -641,10 +641,11 @@ class basefilectx(object):
>  elif r'_descendantrev' in self.__dict__:
>  # this file context was created from a revision with a known
>  # descendant, we can (lazily) correct for linkrev aliases
> -changeid = self._adjustlinkrev(self._descendantrev)
> +changeid = self._adjustlinkrev(self._descendantrev,
> floor=floor)
>  else:
>  changeid = self._filelog.linkrev(self._filerev)
> -self._changeid = changeid
> +if changeid is not None:
> +self._changeid = changeid
>  return changeid
>
>  @propertycache
> @@ -788,7 +789,7 @@ class basefilectx(object):
>
>  return True
>
> -def _adjustlinkrev(self, srcrev, inclusive=False):
> +def _adjustlinkrev(self, srcrev, inclusive=False, floor=None):
>  """return the first ancestor of  introducing 
>
>  If the linkrev of the file revision does not point to an ancestor
> of
> @@ -797,6 +798,10 @@ class basefilectx(object):
>
>  :srcrev: the changeset revision we search ancestors from
>  :inclusive: if true, the src revision will also be checked
> +:floor: an optional revision to stop the walk at. If no
> introduction
> +of this file content could be found before this floor
> +revision, the function will returns "None" and stops its
> +iteration.
>  """
>  repo = self._repo
>  cl = repo.unfiltered().changelog
> @@ -822,6 +827,8 @@ class basefilectx(object):
>  fnode = self._filenode
>  path = self._path
>  for a in iteranc:
> +if floor is not None and a < floor:
> +return None
>  ac = cl.read(a) # get changeset data (we avoid object
> creation)
>  if path in ac[3]: # checking the 'files' field.
>  # The file has been touched, check if the content is
> @@ -837,8 +844,12 @@ class basefilectx(object):
>  def isintroducedafter(self, changelogrev):
>  """True if a filectx have been introduced after a given floor
> revision
>  """
> -return (changelogrev <= self.linkrev()
> -or changelogrev <= self._introrev())
> +if changelogrev <= self.linkrev():
> +return True
> +introrev = self._introrev(floor=changelogrev)
> +if introrev is None:
> +return False
> +return changelogrev <= introrev
>
>  def _lazyrev(self):
>  """return self.rev() if it is available without computation,
> @@ -865,19 +876,28 @@ class basefilectx(object):
>  revision is one of its ancestors. This prevents bugs from
>  'linkrev-shadowing' when a file revision is used by multiple
>  changesets.
> +
>

Seems accidental and should be dropped?


>  """
>  return self._introrev()
>
> -def _introrev(self):
> +def _introrev(self, floor=None):
> +"""
> +Same as `introrev` but, with an extra argument to limit changelog
> +iteration range in some internal usecase.
> +
> +If `floor` is set, the `introrev` will not be searched past that
> +`floor` revision and "None" might be returned. This is useful to
> limit
> +iteration range.
> +"""
>  lkr = self.linkrev()
>  lazyrev = self._lazyrev()
>  

[PATCH 7 of 7] context: floor adjustlinkrev graph walk during copy tracing

2018-09-07 Thread Boris Feld
# HG changeset patch
# User Boris Feld 
# Date 1536255188 14400
#  Thu Sep 06 13:33:08 2018 -0400
# Node ID 0f720a4aa166d08daaa39e8462f2291f77e825c1
# Parent  441c39342d63c75ee101587b2fbf3af60800762f
# EXP-Topic copy-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#  hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
0f720a4aa166
context: floor adjustlinkrev graph walk during copy tracing

The `_adjustlinkrev` method gains an optional "floor" argument. The linkrev
adjustment will give up once this floor is reached. The relevant functions
using `_adjustlinkrev` are updated to pass an appropriate value in the copy
tracing code.

In some private repository, about 10% of the status call triggered
pathological case addressed by this change. The speedup varies from one call
to another, the best-observed win is moving from 170s to 11s.

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -633,7 +633,7 @@ class basefilectx(object):
 def _changeid(self):
 return self._findchangeid()
 
-def _findchangeid(self):
+def _findchangeid(self, floor=None):
 if r'_changeid' in self.__dict__:
 changeid = self._changeid
 elif r'_changectx' in self.__dict__:
@@ -641,10 +641,11 @@ class basefilectx(object):
 elif r'_descendantrev' in self.__dict__:
 # this file context was created from a revision with a known
 # descendant, we can (lazily) correct for linkrev aliases
-changeid = self._adjustlinkrev(self._descendantrev)
+changeid = self._adjustlinkrev(self._descendantrev, floor=floor)
 else:
 changeid = self._filelog.linkrev(self._filerev)
-self._changeid = changeid
+if changeid is not None:
+self._changeid = changeid
 return changeid
 
 @propertycache
@@ -788,7 +789,7 @@ class basefilectx(object):
 
 return True
 
-def _adjustlinkrev(self, srcrev, inclusive=False):
+def _adjustlinkrev(self, srcrev, inclusive=False, floor=None):
 """return the first ancestor of  introducing 
 
 If the linkrev of the file revision does not point to an ancestor of
@@ -797,6 +798,10 @@ class basefilectx(object):
 
 :srcrev: the changeset revision we search ancestors from
 :inclusive: if true, the src revision will also be checked
+:floor: an optional revision to stop the walk at. If no introduction
+of this file content could be found before this floor
+revision, the function will returns "None" and stops its
+iteration.
 """
 repo = self._repo
 cl = repo.unfiltered().changelog
@@ -822,6 +827,8 @@ class basefilectx(object):
 fnode = self._filenode
 path = self._path
 for a in iteranc:
+if floor is not None and a < floor:
+return None
 ac = cl.read(a) # get changeset data (we avoid object creation)
 if path in ac[3]: # checking the 'files' field.
 # The file has been touched, check if the content is
@@ -837,8 +844,12 @@ class basefilectx(object):
 def isintroducedafter(self, changelogrev):
 """True if a filectx have been introduced after a given floor revision
 """
-return (changelogrev <= self.linkrev()
-or changelogrev <= self._introrev())
+if changelogrev <= self.linkrev():
+return True
+introrev = self._introrev(floor=changelogrev)
+if introrev is None:
+return False
+return changelogrev <= introrev
 
 def _lazyrev(self):
 """return self.rev() if it is available without computation,
@@ -865,19 +876,28 @@ class basefilectx(object):
 revision is one of its ancestors. This prevents bugs from
 'linkrev-shadowing' when a file revision is used by multiple
 changesets.
+
 """
 return self._introrev()
 
-def _introrev(self):
+def _introrev(self, floor=None):
+"""
+Same as `introrev` but, with an extra argument to limit changelog
+iteration range in some internal usecase.
+
+If `floor` is set, the `introrev` will not be searched past that
+`floor` revision and "None" might be returned. This is useful to limit
+iteration range.
+"""
 lkr = self.linkrev()
 lazyrev = self._lazyrev()
 if lazyrev is not None:
 if lazyrev == lkr:
 return lazyrev
 else:
-return self._adjustlinkrev(lazyrev, inclusive=True)
+return self._adjustlinkrev(lazyrev, inclusive=True, 
floor=floor)
 else:
-return self._findchangeid()
+return self._findchangeid(floor=floor)
 
 def introfilectx(self):
 """Return filectx having