# HG changeset patch
# User Jun Wu <qu...@fb.com>
# Date 1491597357 25200
#      Fri Apr 07 13:35:57 2017 -0700
# Node ID 48542079e2580e3889eb396c60acee9e1ce36d59
# Parent  0613dc70b604ad9a2abb548e223981defe753c7f
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 48542079e258
filelog: use slow path to calculate size

Previously the code tries to be smart, and use "rawsize" (= "size" in
filelog's parent class revlog) as a fast path, if there is no rename. But
that does not actually work - testing if it's a rename calls the slow path
already. Besides, testing rename is also insufficient - it's really the
metadata header that should be tested, not rename.

This patch uses slow path in all cases. Since the actual slow read is not
avoidable, it should not hurt performance.

This corrects "hg status" output when flag processor is involved. Related
methods are:

  basectx.status -> workingctx._buildstatus -> workingctx._dirstatestatus
  -> workingctx._checklookup -> filectx.cmp -> filelog.cmp

As a side effect, the "XXX" comment, "-4" hack are removed, the table in
"verify.py" is simplified, and test-filelog.py is fixed.

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -821,8 +821,5 @@ class basefilectx(object):
 
         if (fctx._filenode is None
-            and (self._repo._encodefilterpats
-                 # if file data starts with '\1\n', empty metadata block is
-                 # prepended, which adds 4 bytes to filelog.size().
-                 or self.size() - 4 == fctx.size())
+            and self._repo._encodefilterpats
             or self.size() == fctx.size()):
             return self._filelog.cmp(self._filenode, fctx.data())
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -69,13 +69,15 @@ class filelog(revlog.revlog):
         """return the size of a given revision"""
 
-        # for revisions with renames, we have to go the slow way
-        node = self.node(rev)
-        if self.renamed(node):
-            return len(self.read(node))
         if self.iscensored(rev):
             return 0
 
-        # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
-        return super(filelog, self).size(rev)
+        # "rawsize" is faster. but rawsize could be wrong when:
+        # 1. content starts with "\1\n" (metadata header). usually it's a
+        #    rename. rare cases it's a binary file with that header
+        # 2. flag processor is involved, so the content can be changed
+        # checking the metadata header requires reading the content already.
+        # so we cannot avoid reading the content anyway, "rawsize" fastpath
+        # is not really useful.
+        return len(self.read(self.node(rev)))
 
     def cmp(self, node, text):
diff --git a/mercurial/verify.py b/mercurial/verify.py
--- a/mercurial/verify.py
+++ b/mercurial/verify.py
@@ -413,13 +413,10 @@ class verifier(object):
                 # -------------------------------------------------
                 #    rawsize() | L1      | L1     | L1    | L1
-                #       size() | L1      | L2-LM  | L1(*) | L1 (?)
+                #       size() | L1      | L2-LM  | L2-LM | L3-LM
                 # len(rawtext) | L2      | L2     | L2    | L2
                 #    len(text) | L2      | L2     | L2    | L3
-                #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
+                #  len(read()) | L2      | L2-LM  | L2-LM | L3-LM
                 #
                 # LM:  length of metadata, depending on rawtext
-                # (*): not ideal, see comment in filelog.size
-                # (?): could be "- len(meta)" if the resolved content has
-                #      rename metadata
                 #
                 # Checks needed to be done:
diff --git a/tests/test-filelog.py.out b/tests/test-filelog.py.out
--- a/tests/test-filelog.py.out
+++ b/tests/test-filelog.py.out
@@ -1,2 +1,1 @@
-ERROR: FIXME: This is a known failure of filelog.size for data starting with 
\1\n
 OK.
diff --git a/tests/test-flagprocessor.t b/tests/test-flagprocessor.t
--- a/tests/test-flagprocessor.t
+++ b/tests/test-flagprocessor.t
@@ -245,4 +245,3 @@
 
   $ hg status
-  M base64
   $ hg diff
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to