D9745: revlog: use size_t for nodetree capacity

2021-01-12 Thread quark (Jun Wu)
quark created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  This allows handling revlog containing more than 33554432 (INT_MAX /
  sizeof(nodetreenode)) revisions on x64 platforms.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9745

AFFECTED FILES
  mercurial/cext/revlog.c

CHANGE DETAILS

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -55,10 +56,10 @@
indexObject *index;
nodetreenode *nodes;
Py_ssize_t nodelen;
-   unsigned length;   /* # nodes in use */
-   unsigned capacity; /* # nodes allocated */
-   int depth; /* maximum depth of tree */
-   int splits;/* # splits performed */
+   size_t length;   /* # nodes in use */
+   size_t capacity; /* # nodes allocated */
+   int depth;   /* maximum depth of tree */
+   int splits;  /* # splits performed */
 } nodetree;
 
 typedef struct {
@@ -1536,10 +1537,10 @@
 static int nt_new(nodetree *self)
 {
if (self->length == self->capacity) {
-   unsigned newcapacity;
+   size_t newcapacity;
nodetreenode *newnodes;
newcapacity = self->capacity * 2;
-   if (newcapacity >= INT_MAX / sizeof(nodetreenode)) {
+   if (newcapacity >= SIZE_MAX / sizeof(nodetreenode)) {
PyErr_SetString(PyExc_MemoryError,
"overflow in nt_new");
return -1;
@@ -1643,7 +1644,7 @@
self->nodelen = index->nodelen;
self->depth = 0;
self->splits = 0;
-   if ((size_t)self->capacity > INT_MAX / sizeof(nodetreenode)) {
+   if (self->capacity > SIZE_MAX / sizeof(nodetreenode)) {
PyErr_SetString(PyExc_ValueError, "overflow in init_nt");
return -1;
}



To: quark, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D9059: procutil: assign stdio objects if they are None

2020-09-18 Thread quark (Jun Wu)
quark created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  On Python 3 stdio objects can be None. That causes crashes. Fix it by opening
  devnull automatically.
  
  `stdin` can be `None` by using `0<&-` in bash, or spawning processes less
  carefully, for example, watchman used to cause such `None` stdin [1] (note:
  None is only observable on Python 3).
  
  [1]: 
https://github.com/facebook/watchman/commit/d241978aaa6b6d7c5b7260bc9e6d699d3a1cea53

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9059

AFFECTED FILES
  mercurial/utils/procutil.py
  tests/test-stdio-missing.t

CHANGE DETAILS

diff --git a/tests/test-stdio-missing.t b/tests/test-stdio-missing.t
new file mode 100644
--- /dev/null
+++ b/tests/test-stdio-missing.t
@@ -0,0 +1,13 @@
+  $ cat > prompt.py << 'EOF'
+  > from mercurial import exthelper
+  > eh = exthelper.exthelper()
+  > cmdtable = eh.cmdtable
+  > @eh.command(b'prompt', [], norepo=True)
+  > def prompt(ui):
+  > chosen = ui.promptchoice(b"is stdin present? (y/N) $$  $$ ", 
default=1)
+  > ui.write(b"chosen: %d\n" % chosen)
+  > EOF
+
+  $ hg --config extensions.prompt=prompt.py prompt 0<&-
+  is stdin present? (y/N)  n
+  chosen: 1
diff --git a/mercurial/utils/procutil.py b/mercurial/utils/procutil.py
--- a/mercurial/utils/procutil.py
+++ b/mercurial/utils/procutil.py
@@ -114,6 +114,11 @@
 
 
 if pycompat.ispy3:
+# Stdio objects can be 'None' on Python 3. Most code paths (for example,
+# dispatch.initstdio) do not expect that. Fix them by opening devnull.
+sys.stdin = sys.stdin or open(os.devnull, "r")
+sys.stdout = sys.stdout or open(os.devnull, "w")
+sys.stderr = sys.stderr or sys.stdout
 # Python 3 implements its own I/O streams.
 # TODO: .buffer might not exist if std streams were replaced; we'll need
 # a silly wrapper to make a bytes stream backed by a unicode one.



To: quark, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D9058: chg: fallback to original hg if stdio fds are missing

2020-09-18 Thread quark (Jun Wu)
quark created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  If stdio fds are missing (ex. fd 0 is not present), chg might open
  fds that take the numbers 0, and attachio would send the wrong fds
  to the client, which might cause unwanted behaviors. Avoid that by
  detecting the missing fds and falling back to the original hg.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9058

AFFECTED FILES
  contrib/chg/chg.c
  tests/test-chg.t

CHANGE DETAILS

diff --git a/tests/test-chg.t b/tests/test-chg.t
--- a/tests/test-chg.t
+++ b/tests/test-chg.t
@@ -197,6 +197,14 @@
 
   $ cd ..
 
+missing stdio
+-
+
+  $ CHGDEBUG=1 chg version -q 0<&-
+  chg: debug: * stdio fds are missing (glob)
+  chg: debug: * execute original hg (glob)
+  Mercurial Distributed SCM * (glob)
+
 server lifecycle
 
 
diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c
--- a/contrib/chg/chg.c
+++ b/contrib/chg/chg.c
@@ -373,8 +373,15 @@
 }
 
 /*
- * Test whether the command is unsupported or not. This is not designed to
- * cover all cases. But it's fast, does not depend on the server.
+ * Test whether the command and the environment is unsupported or not.
+ *
+ * If any of the stdio file descriptors are not present (rare, but some tools
+ * might spawn new processes without stdio instead of redirecting them to the
+ * null device), then mark it as not supported because attachio won't work
+ * correctly.
+ *
+ * The command list is not designed to cover all cases. But it's fast, and does
+ * not depend on the server.
  */
 static int isunsupported(int argc, const char *argv[])
 {
@@ -384,6 +391,13 @@
};
unsigned int state = 0;
int i;
+   /* use fcntl to test missing stdio fds */
+   if (fcntl(STDIN_FILENO, F_GETFD) == -1 ||
+   fcntl(STDOUT_FILENO, F_GETFD) == -1 ||
+   fcntl(STDERR_FILENO, F_GETFD) == -1) {
+   debugmsg("stdio fds are missing");
+   return 1;
+   }
for (i = 0; i < argc; ++i) {
if (strcmp(argv[i], "--") == 0)
break;



To: quark, #hg-reviewers
Cc: mercurial-patches, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7295: pytype: add a (very slow) test that executes pytype

2020-01-31 Thread quark (Jun Wu)
quark added a comment.


  > No, initially because I'd never heard of it, but now because I don't want 
to deal with ocaml. :)
  
  The `pip install` version ships with a pre-compiled binary so an ocaml 
compiler is not needed.
  
  > I'm dubious that it's doing anywhere near the checking of pytype if it 
manages to run to completion without errors. What does the output look like?
  
  Good question. I wasn't aware about the differences and I read pytype's 
README and the examples apply to pyre. Pytype does type inference while pyre 
only checks function with stricter type annotations.
  
  Pyre does complete (output ). I wasn't able to get 
pytype past 44/251 (output ). For files with type 
annotations like `mail.py`, it seems pyre output is useful:
  
# on commit d844202324924919bc517691052d39c520e077eb
mercurial/mail.py:57:4 Inconsistent override [15]: `mail.STARTTLS.starttls` 
overrides method defined in `smtplib.SMTP` inconsistently. The overriding 
method is not annotated but should return a subtype of `typing.Tuple[int, 
bytes]`.
mercurial/mail.py:387:22 Incompatible variable type [9]: charsets is 
declared to have type `List[str]` but is used as type `None`.
mercurial/mail.py:392:28 Incompatible parameter type [6]: Expected `bytes` 
for 2nd anonymous parameter to call `_encode` but got `Union[bytes, str]`.
mercurial/mail.py:394:33 Incompatible parameter type [6]: Expected `bytes` 
for 1st anonymous parameter to call `encoding.strfromlocal` but got 
`Union[bytes, str]`.
mercurial/mail.py:397:35 Incompatible variable type [9]: charsets is 
declared to have type `List[str]` but is used as type `None`.
mercurial/mail.py:399:4 Incompatible variable type [9]: addr is declared to 
have type `str` but is used as type `bytes`.
mercurial/mail.py:402:30 Incompatible parameter type [6]: Expected 
`typing.Optional[str]` for 1st anonymous parameter to call `str.split` but got 
`bytes`.
mercurial/mail.py:405:8 Incompatible variable type [9]: addr is declared to 
have type `str` but is used as type `bytes`.
mercurial/mail.py:414:63 Incompatible parameter type [6]: Expected `bytes` 
for 1st anonymous parameter to call `encoding.strfromlocal` but got `str`.
mercurial/mail.py:417:31 Incompatible variable type [9]: charsets is 
declared to have type `List[str]` but is used as type `None`.
mercurial/mail.py:426:30 Incompatible variable type [9]: charsets is 
declared to have type `List[str]` but is used as type `None`.
mercurial/mail.py:446:22 Incompatible variable type [9]: charsets is 
declared to have type `List[str]` but is used as type `None`.
mercurial/mail.py:499:52 Incompatible parameter type [6]: Expected 
`Union[email.header.Header, str]` for 1st anonymous parameter to call 
`email.header.decode_header` but got `Union[bytes, email.header.Header]`.
  
  It seems to me that a combined approach might be interesting - use pytype to 
generate `pyi` files and pyre for faster checking.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7295/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7295

To: durin42, #hg-reviewers, indygreg
Cc: quark, marmoute, mharbison72, dlax, indygreg, mjpieters, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7295: pytype: add a (very slow) test that executes pytype

2020-01-30 Thread quark (Jun Wu)
quark added a comment.


  Have `pyre` been considered? It seems `pyre` only takes 10 seconds to check 
all the `.py` files.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7295/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7295

To: durin42, #hg-reviewers, indygreg
Cc: quark, marmoute, mharbison72, dlax, indygreg, mjpieters, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D8039: chg: force-set LC_CTYPE on server start to actual value from the environment

2020-01-30 Thread quark (Jun Wu)
quark added a comment.


  What do you think about this approach:
  
  1. The server detects that LC_TYPE is coerced.
  2. When handling the "validate" command, the server sends back "invalidate 
this server, and fallback to original hg" response.
  
  This makes chg/non-chg behave consistently with some startup overhead in 
mis-configured environment. The chg client can potentially print a warning to 
remind the user to fix their environment.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8039/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8039

To: spectral, #hg-reviewers
Cc: quark, yuja, mjpieters, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7812: examples: specify rustfmt nightly using a $() construct

2020-01-08 Thread quark (Jun Wu)
quark added a comment.


  You can use `rustfmt +nightly` if it's installed by rustup.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7812/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7812

To: durin42, #hg-reviewers
Cc: quark, gracinet, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7631: RFC: absorb: allowing committed changes to be absorbed into their ancestors

2019-12-13 Thread quark (Jun Wu)
quark added a comment.


  `--rev` seems ambiguous since there might be different kinds of revisions to 
specify - target and revisions to edit. Maybe something like `--source`, 
`--from`, `--target`?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7631/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7631

To: rdamazio, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7582: rust-configparser: run rustfmt

2019-12-07 Thread quark (Jun Wu)
quark added a comment.


  We actually ran rustfmt, but it's an older version (rustfmt 1.3.0-stable 
(d334502 2019-06-09)) and I verified the code still formats under that 
particular version of rustfmt with empty `rustfmt.toml`. In other words, 
rustfmt itself seems to change over time.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7582/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7582

To: indygreg, #hg-reviewers
Cc: quark, durin42, kevincox, mjpieters, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7575: hg-core: vendor Facebook's configparser crate

2019-12-07 Thread quark (Jun Wu)
quark added inline comments.

INLINE COMMENTS

> config.rs:72
> +///
> +/// If `path` is a directory, it is ignored.
> +/// If `path` is a file, it will be loaded directly.

You might want to revert D13875655 for the directory include feature, which has 
some test changes.

> generate_parser.py:103-107
> +// This file should really be just 3 lines:
> +//
> +// #[derive(Parser)]
> +// #[grammar = "spec.pest"]
> +// pub(crate) struct ConfigParser;

You might want to follow this and remove the generated code.

> hg.rs:141-144
> +// exitcodemask is blacklisted if exitcode is outside 
> HGPLAINEXCEPT.
> +if !plain_exceptions.contains("exitcode") {
> +ui_blacklist.insert("exitcodemask".into());
> +}

You might want to remove these lines.

> hg.rs:262-263
> +//
> +// Unlike Mercurial, don't convert the "$PAGER" environment variable
> +// to "pager.pager" config.
> +//

You might want to respect `$PAGER`. We ignored it to reduce support burden for 
mis-configuration.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7575/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7575

To: indygreg, #hg-reviewers
Cc: quark, durin42, kevincox, mjpieters, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7518: revlog: fix revset in reachableroots docstring

2019-11-26 Thread quark (Jun Wu)
Closed by commit rHG1a42f8451a92: revlog: fix revset in reachableroots 
docstring (authored by quark).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7518?vs=18390=18394

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7518/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7518

AFFECTED FILES
  mercurial/revlog.py

CHANGE DETAILS

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1279,7 +1279,7 @@
 return bool(self.reachableroots(a, [b], [a], includepath=False))
 
 def reachableroots(self, minroot, heads, roots, includepath=False):
-"""return (heads(:: and ::))
+"""return (heads(::( and ::)))
 
 If includepath is True, return (::)."""
 try:



To: quark, indygreg, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7520: dateutil: correct default for Ymd in parsedate

2019-11-26 Thread quark (Jun Wu)
Closed by commit rHGaef7b91dba51: dateutil: correct default for Ymd in 
parsedate (authored by quark).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7520?vs=18392=18396

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7520/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7520

AFFECTED FILES
  mercurial/utils/dateutil.py

CHANGE DETAILS

diff --git a/mercurial/utils/dateutil.py b/mercurial/utils/dateutil.py
--- a/mercurial/utils/dateutil.py
+++ b/mercurial/utils/dateutil.py
@@ -209,6 +209,8 @@
 True
 >>> tz == strtz
 True
+>>> parsedate(b'2000 UTC', formats=extendeddateformats)
+(946684800, 0)
 """
 if bias is None:
 bias = {}
@@ -244,7 +246,8 @@
 if part[0:1] in b"HMS":
 b = b"00"
 else:
-b = b"0"
+# year, month, and day start from 1
+b = b"1"
 
 # this piece is for matching the generic end to today's date
 n = datestr(now, b"%" + part[0:1])



To: quark, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7519: test-doctest: include dateutil

2019-11-26 Thread quark (Jun Wu)
Closed by commit rHG92518ca66c76: test-doctest: include dateutil (authored by 
quark).
This revision was automatically updated to reflect the committed changes.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7519?vs=18391=18395

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7519/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7519

AFFECTED FILES
  tests/test-doctest.py

CHANGE DETAILS

diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -82,6 +82,7 @@
 testmod('mercurial.url')
 testmod('mercurial.util')
 testmod('mercurial.util', testtarget='platform')
+testmod('mercurial.utils.dateutil')
 testmod('mercurial.utils.stringutil')
 testmod('hgext.convert.convcmd')
 testmod('hgext.convert.cvsps')



To: quark, #hg-reviewers, pulkit
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7520: dateutil: correct default for Ymd in parsedate

2019-11-25 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The code uses `0` for the default value of Ymd (year, month, and day), which
  seems suboptimal. For example, these will fail to parse:
  
dateutil.parsedate('2000', formats=dateutil.extendeddateformats)
dateutil.parsedate('Jan 2000', formats=dateutil.extendeddateformats)
  
  Fix it by providing sane defaults (1 instead of 0) for year, month, and day.
  
  The suboptimal behavior was introduced by 91bc001a592 
 
(2010-12-29,
  "date: fix matching of underspecified date ranges"), which does not seem to
  justify the current behavior.
  
  Note end-users should not notice the subtle issue, because there are no 
formats
  in `defaultdateformats` that allow an explicit year with omitted month, or an
  explicit month with omitted day.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7520

AFFECTED FILES
  mercurial/utils/dateutil.py

CHANGE DETAILS

diff --git a/mercurial/utils/dateutil.py b/mercurial/utils/dateutil.py
--- a/mercurial/utils/dateutil.py
+++ b/mercurial/utils/dateutil.py
@@ -209,6 +209,8 @@
 True
 >>> tz == strtz
 True
+>>> parsedate(b'2000 UTC', formats=extendeddateformats)
+(946684800, 0)
 """
 if bias is None:
 bias = {}
@@ -244,7 +246,8 @@
 if part[0:1] in b"HMS":
 b = b"00"
 else:
-b = b"0"
+# year, month, and day start from 1
+b = b"1"
 
 # this piece is for matching the generic end to today's date
 n = datestr(now, b"%" + part[0:1])



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7519: test-doctest: include dateutil

2019-11-25 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  `mercurial.utils.dateutil` has dostrings that contain doctests. Include them.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7519

AFFECTED FILES
  tests/test-doctest.py

CHANGE DETAILS

diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -82,6 +82,7 @@
 testmod('mercurial.url')
 testmod('mercurial.util')
 testmod('mercurial.util', testtarget='platform')
+testmod('mercurial.utils.dateutil')
 testmod('mercurial.utils.stringutil')
 testmod('hgext.convert.convcmd')
 testmod('hgext.convert.cvsps')



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D7518: revlog: fix revset in reachableroots docstring

2019-11-25 Thread quark (Jun Wu)
quark created this revision.
Herald added a reviewer: indygreg.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  `reachableroots` will only return a subset of `roots` when `includepath` is
  False. For example, given the following linear DAG:
  
2
|
1
|
0
  
  Using roots=0+2, heads=1, the definition in the docstring does not match what
  `reachableroots` actually does:
  
ipdb> repo.changelog.reachableroots(0, roots=[0,2],heads=[1])
[0]
ipdb> repo.revs('heads(::(0+2) & (0+2)::1)')

  
  The fix is to do `heads & ::roots` (or `heads & heads::roots`) first, then
  select their ancestors:
  
ipdb> repo.revs('heads(::((0+2) & (0+2)::1))')

  
  The docstring was introduced by fd92bfbbe02d9 
 
(2015-06-19 "revset: rename
  revsbetween to reachableroots and add an argument"), which introduced the
  `includepath=False` behavior for graphlog grandparents use-case. I believe
  the docstring instead of the code should be changed because changing the
  code to match the docstring can result in suboptimal graphlog like:
  
o
:\
: o
: :
:/
o
  
  As opposite to the current "linearized" graphlog:
  
o
|
o
:
o

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7518

AFFECTED FILES
  mercurial/revlog.py

CHANGE DETAILS

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1279,7 +1279,7 @@
 return bool(self.reachableroots(a, [b], [a], includepath=False))
 
 def reachableroots(self, minroot, heads, roots, includepath=False):
-"""return (heads(:: and ::))
+"""return (heads(::( and ::)))
 
 If includepath is True, return (::)."""
 try:



To: quark, indygreg, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4464: drawdag: correctly pass repo to super constructor

2018-09-04 Thread quark (Jun Wu)
quark accepted this revision.
quark added a comment.


  LGTM

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4464

To: martinvonz, #hg-reviewers, quark
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4150: linelog: optimize replacelines

2018-08-09 Thread quark (Jun Wu)
quark added a comment.


  To clarify, I do like stateless API is better. It can be done by keeping 
`_lastannotate` as a private cache inaccessible from other APIs, move 
`annotateresult` to the return value of `annotate`, then add `arev` to 
`replacelines` to verify the cache. The C code use `brev` instead of `rev` as 
the parameter name for a reason.
  
  The original `annotateresult` is used as an attempt to reduce C code to be 
reviewed. Reviewing C code took too long.
  
  The choice of C was because I pursued performance, C is not bad for this 
particular work (ex. linelog.c has similar LOC), and C is also friendly for the 
Git community.
  
  To provide more context, mpm commented internally on how the linelog format 
might be improved. That is to add a `LINESPAN` instruction replacing a range of 
continuous LINE instructions. Again, not implemented because of C. But it seems 
much easier with Python now.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4150

To: quark, #hg-reviewers, durin42
Cc: durin42, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4148: perf: add a command to benchmark linelog edits

2018-08-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG1601afbb573c: perf: add a command to benchmark linelog 
edits (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4148?vs=10065=10133

REVISION DETAIL
  https://phab.mercurial-scm.org/D4148

AFFECTED FILES
  contrib/perf.py
  tests/test-contrib-perf.t

CHANGE DETAILS

diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -82,6 +82,8 @@
  (no help text available)
perfheads (no help text available)
perfindex (no help text available)
+   perflinelogedits
+ (no help text available)
perfloadmarkers
  benchmark the time to parse the on-disk markers for a repo
perflog   (no help text available)
@@ -154,6 +156,7 @@
 #endif
   $ hg perfheads
   $ hg perfindex
+  $ hg perflinelogedits -n 1
   $ hg perfloadmarkers
   $ hg perflog
   $ hg perflookup 2
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -889,6 +889,38 @@
 timer(lambda: len(repo.lookup(rev)))
 fm.end()
 
+@command('perflinelogedits',
+ [('n', 'edits', 1, 'number of edits'),
+  ('', 'max-hunk-lines', 10, 'max lines in a hunk'),
+ ], norepo=True)
+def perflinelogedits(ui, **opts):
+from mercurial import linelog
+
+edits = opts['edits']
+maxhunklines = opts['max_hunk_lines']
+
+maxb1 = 10
+random.seed(0)
+randint = random.randint
+currentlines = 0
+arglist = []
+for rev in xrange(edits):
+a1 = randint(0, currentlines)
+a2 = randint(a1, min(currentlines, a1 + maxhunklines))
+b1 = randint(0, maxb1)
+b2 = randint(b1, b1 + maxhunklines)
+currentlines += (b2 - b1) - (a2 - a1)
+arglist.append((rev, a1, a2, b1, b2))
+
+def d():
+ll = linelog.linelog()
+for args in arglist:
+ll.replacelines(*args)
+
+timer, fm = gettimer(ui, opts)
+timer(d)
+fm.end()
+
 @command('perfrevrange', formatteropts)
 def perfrevrange(ui, repo, *specs, **opts):
 timer, fm = gettimer(ui, opts)



To: quark, #hg-reviewers, durin42
Cc: durin42, indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4150: linelog: optimize replacelines

2018-08-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGee97f7a677f3: linelog: optimize replacelines (authored by 
quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4150?vs=10067=10135

REVISION DETAIL
  https://phab.mercurial-scm.org/D4150

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -313,17 +313,22 @@
 appendinst = self._program.append
 
 # insert
+blineinfos = []
+bappend = blineinfos.append
 if b1 < b2:
 # Determine the jump target for the JGE at the start of
 # the new block.
 tgt = oldproglen + (b2 - b1 + 1)
 # Jump to skip the insert if we're at an older revision.
 appendinst(_jl(rev, tgt))
 for linenum in pycompat.xrange(b1, b2):
 if _internal_blines is None:
+bappend(lineinfo(rev, linenum, programlen()))
 appendinst(_line(rev, linenum))
 else:
-appendinst(_line(*_internal_blines[linenum]))
+newrev, newlinenum = _internal_blines[linenum]
+bappend(lineinfo(newrev, newlinenum, programlen()))
+appendinst(_line(newrev, newlinenum))
 # delete
 if a1 < a2:
 if a2 > len(ar.lines):
@@ -342,19 +347,26 @@
 endaddr = ar.lines[a2 - 1]._offset + 1
 appendinst(_jge(rev, endaddr))
 # copy instruction from a1
+a1instpc = programlen()
 appendinst(a1inst)
 # if a1inst isn't a jump or EOF, then we need to add an unconditional
 # jump back into the program here.
 if not isinstance(a1inst, (_jump, _eof)):
 appendinst(_jump(0, a1info._offset + 1))
 # Patch instruction at a1, which makes our patch live.
 self._program[a1info._offset] = _jump(0, oldproglen)
-# For compat with the C version, re-annotate rev so that
-# self.annotateresult is cromulent.. We could fix up the
-# annotateresult in place (which is how the C version works),
-# but for now we'll pass on that and see if it matters in
-# practice.
-self.annotate(max(self._lastannotate.rev, rev))
+
+# Update self._lastannotate in place. This serves as a cache to avoid
+# expensive "self.annotate" in this function, when "replacelines" is
+# used continuously.
+if len(self._lastannotate.lines) > a1:
+self._lastannotate.lines[a1]._offset = a1instpc
+else:
+assert isinstance(a1inst, _eof)
+self._lastannotate._eof = a1instpc
+self._lastannotate.lines[a1:a2] = blineinfos
+self._lastannotate.rev = max(self._lastannotate.rev, rev)
+
 if rev > self._maxrev:
 self._maxrev = rev
 



To: quark, #hg-reviewers, durin42
Cc: durin42, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4149: linelog: extract `len(self._program)` to a local function

2018-08-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG32b1967b8734: linelog: extract `len(self._program)` to a 
local function (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4149?vs=10066=10134

REVISION DETAIL
  https://phab.mercurial-scm.org/D4149

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -308,7 +308,8 @@
 else:
 a1info = ar.lines[a1]
 a1inst = self._program[a1info._offset]
-oldproglen = len(self._program)
+programlen = self._program.__len__
+oldproglen = programlen()
 appendinst = self._program.append
 
 # insert



To: quark, #hg-reviewers, indygreg
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4147: linelog: update internal help text

2018-08-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGc10be3fc200b: linelog: update internal help text (authored 
by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4147?vs=10064=10132

REVISION DETAIL
  https://phab.mercurial-scm.org/D4147

AFFECTED FILES
  mercurial/help/internals/linelog.txt

CHANGE DETAILS

diff --git a/mercurial/help/internals/linelog.txt 
b/mercurial/help/internals/linelog.txt
--- a/mercurial/help/internals/linelog.txt
+++ b/mercurial/help/internals/linelog.txt
@@ -112,26 +112,49 @@
 1. Interleaved insertions, or interleaved deletions.
It can be rewritten to a non-interleaved tree structure.
 
-   ^AI/D x ^AI/D x
-   ^AI/D y  -> ^AI/D y
-   ^AE x   ^AE y
-   ^AE y   ^AE x
+   Take insertions as example, deletions are similar:
+
+   ^AI x ^AI x
+   a a
+   ^AI x + 1  -> ^AI x + 1
+   b b
+   ^AE x ^AE x + 1
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
 
 2. Nested insertions, where the inner one has a smaller revision number.
+   Or nested deletions, where the inner one has a larger revision number.
It can be rewritten to a non-nested form.
 
+   Take insertions as example, deletions are similar:
+
^AI x + 1 ^AI x + 1
+   a a
^AI x  -> ^AE x + 1
-   ^AE x ^AI x
-   ^AE x + 1 ^AE x
+   b ^AI x
+   ^AE x b
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
 
-3. Insertion or deletion inside another deletion, where the outer deletion
-   block has a smaller revision number.
+3. Insertion inside deletion with a smaller revision number.
+
+   Rewrite by duplicating the content inserted:
 
^AD x  ^AD x
-   ^AI/D x + 1 -> ^AE x
-   ^AE x + 1  ^AI/D x + 1
-   ^AE x  ^AE x
+   a  a
+   ^AI x + 1  ->  b
+   b  c
+   ^AE x + 1  ^AE x
+   c  ^AI x + 1
+   ^AE x  b
+  ^AE x + 1
+
+   Note: If "annotate" purely depends on "^AI" information, then the
+   duplication content will lose track of where "b" is originally from.
 
   Some of them may be valid in other implementations for special purposes. For
   example, to "revive" a previously deleted block in a newer revision.
@@ -249,3 +272,31 @@
 
   "c" makes "hg absorb" easier to implement and makes it possible to do
   "annotate --deleted".
+
+1.4 Malformed Cases Handling
+
+  The following "case 1", "case 2", and "case 3" refer to cases mentioned
+  in section 0.5.
+
+  Using the exposed API (replacelines), case 1 is impossible to generate,
+  although it's possible to generate it by constructing rawdata and load that
+  via linelog.fromdata.
+
+  Doing annotate(maxrev) before replacelines (aka. a1, a2 passed to
+  replacelines are related to the latest revision) eliminates the possibility
+  of case 3. That makes sense since usually you'd like to make edits on top of
+  the latest revision. Practically, both absorb and fastannotate do this.
+
+  Doing annotate(maxrev), plus replacelines(rev, ...) where rev >= maxrev
+  eliminates the possibility of case 2. That makes sense since usually the
+  edits belong to "new revisions", not "old revisions". Practically,
+  fastannotate does this. Absorb calls replacelines with rev < maxrev to edit
+  past revisions. So it needs some extra care to not generate case 2.
+
+  If case 1 occurs, that probably means linelog file corruption (assuming
+  linelog is edited via public APIs) the checkout or annotate result could
+  be less meaningful or even error out, but linelog wouldn't enter an infinite
+  loop.
+
+  If either case 2 or 3 occurs, linelog works as if the inner "^AI/D" and "^AE"
+  operations on the left side are silently ignored.



To: quark, #hg-reviewers
Cc: martinvonz, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4149: linelog: extract `len(self._program)` to a local function

2018-08-07 Thread quark (Jun Wu)
quark updated this revision to Diff 10066.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4149?vs=10026=10066

REVISION DETAIL
  https://phab.mercurial-scm.org/D4149

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -308,7 +308,8 @@
 else:
 a1info = ar.lines[a1]
 a1inst = self._program[a1info._offset]
-oldproglen = len(self._program)
+programlen = self._program.__len__
+oldproglen = programlen()
 appendinst = self._program.append
 
 # insert



To: quark, #hg-reviewers, indygreg
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4150: linelog: optimize replacelines

2018-08-07 Thread quark (Jun Wu)
quark updated this revision to Diff 10067.
quark edited the summary of this revision.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4150?vs=10025=10067

REVISION DETAIL
  https://phab.mercurial-scm.org/D4150

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -313,17 +313,22 @@
 appendinst = self._program.append
 
 # insert
+blineinfos = []
+bappend = blineinfos.append
 if b1 < b2:
 # Determine the jump target for the JGE at the start of
 # the new block.
 tgt = oldproglen + (b2 - b1 + 1)
 # Jump to skip the insert if we're at an older revision.
 appendinst(_jl(rev, tgt))
 for linenum in pycompat.xrange(b1, b2):
 if _internal_blines is None:
+bappend(lineinfo(rev, linenum, programlen()))
 appendinst(_line(rev, linenum))
 else:
-appendinst(_line(*_internal_blines[linenum]))
+newrev, newlinenum = _internal_blines[linenum]
+bappend(lineinfo(newrev, newlinenum, programlen()))
+appendinst(_line(newrev, newlinenum))
 # delete
 if a1 < a2:
 if a2 > len(ar.lines):
@@ -342,19 +347,26 @@
 endaddr = ar.lines[a2 - 1]._offset + 1
 appendinst(_jge(rev, endaddr))
 # copy instruction from a1
+a1instpc = programlen()
 appendinst(a1inst)
 # if a1inst isn't a jump or EOF, then we need to add an unconditional
 # jump back into the program here.
 if not isinstance(a1inst, (_jump, _eof)):
 appendinst(_jump(0, a1info._offset + 1))
 # Patch instruction at a1, which makes our patch live.
 self._program[a1info._offset] = _jump(0, oldproglen)
-# For compat with the C version, re-annotate rev so that
-# self.annotateresult is cromulent.. We could fix up the
-# annotateresult in place (which is how the C version works),
-# but for now we'll pass on that and see if it matters in
-# practice.
-self.annotate(max(self._lastannotate.rev, rev))
+
+# Update self._lastannotate in place. This serves as a cache to avoid
+# expensive "self.annotate" in this function, when "replacelines" is
+# used continuously.
+if len(self._lastannotate.lines) > a1:
+self._lastannotate.lines[a1]._offset = a1instpc
+else:
+assert isinstance(a1inst, _eof)
+self._lastannotate._eof = a1instpc
+self._lastannotate.lines[a1:a2] = blineinfos
+self._lastannotate.rev = max(self._lastannotate.rev, rev)
+
 if rev > self._maxrev:
 self._maxrev = rev
 



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4148: perf: add a command to benchmark linelog edits

2018-08-07 Thread quark (Jun Wu)
quark updated this revision to Diff 10065.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4148?vs=10061=10065

REVISION DETAIL
  https://phab.mercurial-scm.org/D4148

AFFECTED FILES
  contrib/perf.py
  tests/test-contrib-perf.t

CHANGE DETAILS

diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -82,6 +82,8 @@
  (no help text available)
perfheads (no help text available)
perfindex (no help text available)
+   perflinelogedits
+ (no help text available)
perfloadmarkers
  benchmark the time to parse the on-disk markers for a repo
perflog   (no help text available)
@@ -154,6 +156,7 @@
 #endif
   $ hg perfheads
   $ hg perfindex
+  $ hg perflinelogedits -n 1
   $ hg perfloadmarkers
   $ hg perflog
   $ hg perflookup 2
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -889,6 +889,38 @@
 timer(lambda: len(repo.lookup(rev)))
 fm.end()
 
+@command('perflinelogedits',
+ [('n', 'edits', 1, 'number of edits'),
+  ('', 'max-hunk-lines', 10, 'max lines in a hunk'),
+ ], norepo=True)
+def perflinelogedits(ui, **opts):
+from mercurial import linelog
+
+edits = opts['edits']
+maxhunklines = opts['max_hunk_lines']
+
+maxb1 = 10
+random.seed(0)
+randint = random.randint
+currentlines = 0
+arglist = []
+for rev in xrange(edits):
+a1 = randint(0, currentlines)
+a2 = randint(a1, min(currentlines, a1 + maxhunklines))
+b1 = randint(0, maxb1)
+b2 = randint(b1, b1 + maxhunklines)
+currentlines += (b2 - b1) - (a2 - a1)
+arglist.append((rev, a1, a2, b1, b2))
+
+def d():
+ll = linelog.linelog()
+for args in arglist:
+ll.replacelines(*args)
+
+timer, fm = gettimer(ui, opts)
+timer(d)
+fm.end()
+
 @command('perfrevrange', formatteropts)
 def perfrevrange(ui, repo, *specs, **opts):
 timer, fm = gettimer(ui, opts)



To: quark, #hg-reviewers
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4147: linelog: update internal help text

2018-08-07 Thread quark (Jun Wu)
quark updated this revision to Diff 10064.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4147?vs=10027=10064

REVISION DETAIL
  https://phab.mercurial-scm.org/D4147

AFFECTED FILES
  mercurial/help/internals/linelog.txt

CHANGE DETAILS

diff --git a/mercurial/help/internals/linelog.txt 
b/mercurial/help/internals/linelog.txt
--- a/mercurial/help/internals/linelog.txt
+++ b/mercurial/help/internals/linelog.txt
@@ -112,26 +112,49 @@
 1. Interleaved insertions, or interleaved deletions.
It can be rewritten to a non-interleaved tree structure.
 
-   ^AI/D x ^AI/D x
-   ^AI/D y  -> ^AI/D y
-   ^AE x   ^AE y
-   ^AE y   ^AE x
+   Take insertions as example, deletions are similar:
+
+   ^AI x ^AI x
+   a a
+   ^AI x + 1  -> ^AI x + 1
+   b b
+   ^AE x ^AE x + 1
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
 
 2. Nested insertions, where the inner one has a smaller revision number.
+   Or nested deletions, where the inner one has a larger revision number.
It can be rewritten to a non-nested form.
 
+   Take insertions as example, deletions are similar:
+
^AI x + 1 ^AI x + 1
+   a a
^AI x  -> ^AE x + 1
-   ^AE x ^AI x
-   ^AE x + 1 ^AE x
+   b ^AI x
+   ^AE x b
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
+
+3. Insertion inside deletion with a smaller revision number.
 
-3. Insertion or deletion inside another deletion, where the outer deletion
-   block has a smaller revision number.
+   Rewrite by duplicating the content inserted:
 
^AD x  ^AD x
-   ^AI/D x + 1 -> ^AE x
-   ^AE x + 1  ^AI/D x + 1
-   ^AE x  ^AE x
+   a  a
+   ^AI x + 1  ->  b
+   b  c
+   ^AE x + 1  ^AE x
+   c  ^AI x + 1
+   ^AE x  b
+  ^AE x + 1
+
+   Note: If "annotate" purely depends on "^AI" information, then the
+   duplication content will lose track of where "b" is originally from.
 
   Some of them may be valid in other implementations for special purposes. For
   example, to "revive" a previously deleted block in a newer revision.
@@ -249,3 +272,31 @@
 
   "c" makes "hg absorb" easier to implement and makes it possible to do
   "annotate --deleted".
+
+1.4 Malformed Cases Handling
+
+  The following "case 1", "case 2", and "case 3" refer to cases mentioned
+  in section 0.5.
+
+  Using the exposed API (replacelines), case 1 is impossible to generate,
+  although it's possible to generate it by constructing rawdata and load that
+  via linelog.fromdata.
+
+  Doing annotate(maxrev) before replacelines (aka. a1, a2 passed to
+  replacelines are related to the latest revision) eliminates the possibility
+  of case 3. That makes sense since usually you'd like to make edits on top of
+  the latest revision. Practically, both absorb and fastannotate do this.
+
+  Doing annotate(maxrev), plus replacelines(rev, ...) where rev >= maxrev
+  eliminates the possibility of case 2. That makes sense since usually the
+  edits belong to "new revisions", not "old revisions". Practically,
+  fastannotate does this. Absorb calls replacelines with rev < maxrev to edit
+  past revisions. So it needs some extra care to not generate case 2.
+
+  If case 1 occurs, that probably means linelog file corruption (assuming
+  linelog is edited via public APIs) the checkout or annotate result could
+  be less meaningful or even error out, but linelog wouldn't enter an infinite
+  loop.
+
+  If either case 2 or 3 occurs, linelog works as if the inner "^AI/D" and "^AE"
+  operations on the left side are silently ignored.



To: quark, #hg-reviewers
Cc: martinvonz, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4148: perf: add a command to benchmark linelog edits

2018-08-07 Thread quark (Jun Wu)
quark added inline comments.

INLINE COMMENTS

> indygreg wrote in perf.py:904-912
> I'm a bit concerned about the use of random data in a benchmark. Since every 
> run will be different, the benchmark may not be stable. And if the random 
> source is slow, we could be spending time waiting on entropy.
> 
> Do you think it is worthwhile to precompute the arguments to `replacelines()` 
> and have the benchmark simply iterate through a list of arguments and call 
> `replacelines()` repeatedly?

I think `random.seed(0)` makes it stable?

Good point about pre-calculate randints. Will change.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4148

To: quark, #hg-reviewers
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4148: perf: add a command to benchmark linelog edits

2018-08-07 Thread quark (Jun Wu)
quark updated this revision to Diff 10061.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4148?vs=10023=10061

REVISION DETAIL
  https://phab.mercurial-scm.org/D4148

AFFECTED FILES
  contrib/perf.py
  tests/test-contrib-perf.t

CHANGE DETAILS

diff --git a/tests/test-contrib-perf.t b/tests/test-contrib-perf.t
--- a/tests/test-contrib-perf.t
+++ b/tests/test-contrib-perf.t
@@ -82,6 +82,8 @@
  (no help text available)
perfheads (no help text available)
perfindex (no help text available)
+   perflinelogedits
+ (no help text available)
perfloadmarkers
  benchmark the time to parse the on-disk markers for a repo
perflog   (no help text available)
@@ -154,6 +156,7 @@
 #endif
   $ hg perfheads
   $ hg perfindex
+  $ hg perflinelogedits -n 1
   $ hg perfloadmarkers
   $ hg perflog
   $ hg perflookup 2
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -889,6 +889,34 @@
 timer(lambda: len(repo.lookup(rev)))
 fm.end()
 
+@command('perflinelogedits',
+ [('n', 'edits', 1, 'number of edits'),
+  ('', 'max-hunk-lines', 10, 'max lines in a hunk'),
+ ], norepo=True)
+def perflinelogedits(ui, **opts):
+from mercurial import linelog
+
+edits = opts['edits']
+maxhunklines = opts['max_hunk_lines']
+
+def d():
+maxb1 = 10
+random.seed(0)
+randint = random.randint
+currentlines = 0
+ll = linelog.linelog()
+for rev in xrange(edits):
+a1 = randint(0, currentlines)
+a2 = randint(a1, min(currentlines, a1 + maxhunklines))
+b1 = randint(0, maxb1)
+b2 = randint(b1, b1 + maxhunklines)
+currentlines += (b2 - b1) - (a2 - a1)
+ll.replacelines(rev, a1, a2, b1, b2)
+
+timer, fm = gettimer(ui, opts)
+timer(d)
+fm.end()
+
 @command('perfrevrange', formatteropts)
 def perfrevrange(ui, repo, *specs, **opts):
 timer, fm = gettimer(ui, opts)



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4151: linelog: fix infinite loop vulnerability

2018-08-07 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG27a54096c92e: linelog: fix infinite loop vulnerability 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4151?vs=10032=10046

REVISION DETAIL
  https://phab.mercurial-scm.org/D4151

AFFECTED FILES
  mercurial/linelog.py
  tests/test-linelog.py

CHANGE DETAILS

diff --git a/tests/test-linelog.py b/tests/test-linelog.py
--- a/tests/test-linelog.py
+++ b/tests/test-linelog.py
@@ -179,6 +179,15 @@
 ar = ll.annotate(rev)
 self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
 
+def testinfinitebadprogram(self):
+ll = linelog.linelog.fromdata(
+b'\x00\x00\x00\x00\x00\x00\x00\x02'  # header
+b'\x00\x00\x00\x00\x00\x00\x00\x01'  # JUMP to self
+)
+with self.assertRaises(linelog.LineLogError):
+# should not be an infinite loop and raise
+ll.annotate(1)
+
 if __name__ == '__main__':
 import silenttestrunner
 silenttestrunner.main(__name__)
diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -360,13 +360,15 @@
 def annotate(self, rev):
 pc = 1
 lines = []
-# Sanity check: if len(lines) is longer than len(program), we
+executed = 0
+# Sanity check: if instructions executed exceeds len(program), we
 # hit an infinite loop in the linelog program somehow and we
 # should stop.
-while pc is not None and len(lines) < len(self._program):
+while pc is not None and executed < len(self._program):
 inst = self._program[pc]
 lastpc = pc
 pc = inst.execute(rev, pc, lines.append)
+executed += 1
 if pc is not None:
 raise LineLogError(
 'Probably hit an infinite loop in linelog. Program:\n' +



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4151: linelog: fix infinite loop vulnerability

2018-08-06 Thread quark (Jun Wu)
quark updated this revision to Diff 10032.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4151?vs=10031=10032

REVISION DETAIL
  https://phab.mercurial-scm.org/D4151

AFFECTED FILES
  mercurial/linelog.py
  tests/test-linelog.py

CHANGE DETAILS

diff --git a/tests/test-linelog.py b/tests/test-linelog.py
--- a/tests/test-linelog.py
+++ b/tests/test-linelog.py
@@ -179,6 +179,15 @@
 ar = ll.annotate(rev)
 self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
 
+def testinfinitebadprogram(self):
+ll = linelog.linelog.fromdata(
+b'\x00\x00\x00\x00\x00\x00\x00\x02'  # header
+b'\x00\x00\x00\x00\x00\x00\x00\x01'  # JUMP to self
+)
+with self.assertRaises(linelog.LineLogError):
+# should not be an infinite loop and raise
+ll.annotate(1)
+
 if __name__ == '__main__':
 import silenttestrunner
 silenttestrunner.main(__name__)
diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -373,13 +373,15 @@
 def annotate(self, rev):
 pc = 1
 lines = []
-# Sanity check: if len(lines) is longer than len(program), we
+executed = 0
+# Sanity check: if instructions executed exceeds len(program), we
 # hit an infinite loop in the linelog program somehow and we
 # should stop.
-while pc is not None and len(lines) < len(self._program):
+while pc is not None and executed < len(self._program):
 inst = self._program[pc]
 lastpc = pc
 pc = inst.execute(rev, pc, lines.append)
+executed += 1
 if pc is not None:
 raise LineLogError(
 'Probably hit an infinite loop in linelog. Program:\n' +



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4151: linelog: fix infinite loop vulnerability

2018-08-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Checking `len(lines)` is not a great way of detecting infinite loops, as
  demonstrated in the added test. Therefore check instruction count instead.
  
  The original C implementation does not have this problem. There are a few
  other places where the C implementation enforces more strictly, like
  `a1 <= a2 <= len(lines)`, `b1 <= b2`, `rev > 0`. But they are optional.

TEST PLAN
  Add a test. The old code forces the test to time out.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4151

AFFECTED FILES
  mercurial/linelog.py
  tests/test-linelog.py

CHANGE DETAILS

diff --git a/tests/test-linelog.py b/tests/test-linelog.py
--- a/tests/test-linelog.py
+++ b/tests/test-linelog.py
@@ -179,6 +179,15 @@
 ar = ll.annotate(rev)
 self.assertEqual([(l.rev, l.linenum) for l in ar], lines)
 
+def testinfinitebadprogram(self):
+ll = linelog.linelog.fromdata(
+b'\x00\x00\x00\x00\x00\x00\x00'  # header
+b'\x02\x00\x00\x00\x00\x00\x00\x00\x01'  # JUMP to self
+)
+with self.assertRaises(linelog.LineLogError):
+# should not be an infinite loop and raise
+ll.annotate(1)
+
 if __name__ == '__main__':
 import silenttestrunner
 silenttestrunner.main(__name__)
diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -373,13 +373,15 @@
 def annotate(self, rev):
 pc = 1
 lines = []
-# Sanity check: if len(lines) is longer than len(program), we
+executed = 0
+# Sanity check: if instructions executed exceeds len(program), we
 # hit an infinite loop in the linelog program somehow and we
 # should stop.
-while pc is not None and len(lines) < len(self._program):
+while pc is not None and executed < len(self._program):
 inst = self._program[pc]
 lastpc = pc
 pc = inst.execute(rev, pc, lines.append)
+executed += 1
 if pc is not None:
 raise LineLogError(
 'Probably hit an infinite loop in linelog. Program:\n' +



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4147: linelog: update internal help text

2018-08-06 Thread quark (Jun Wu)
quark updated this revision to Diff 10027.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4147?vs=10022=10027

REVISION DETAIL
  https://phab.mercurial-scm.org/D4147

AFFECTED FILES
  mercurial/help/internals/linelog.txt

CHANGE DETAILS

diff --git a/mercurial/help/internals/linelog.txt 
b/mercurial/help/internals/linelog.txt
--- a/mercurial/help/internals/linelog.txt
+++ b/mercurial/help/internals/linelog.txt
@@ -112,26 +112,49 @@
 1. Interleaved insertions, or interleaved deletions.
It can be rewritten to a non-interleaved tree structure.
 
-   ^AI/D x ^AI/D x
-   ^AI/D y  -> ^AI/D y
-   ^AE x   ^AE y
-   ^AE y   ^AE x
+   Take insertions as example, deletions are similar:
+
+   ^AI x ^AI x
+   a a
+   ^AI x + 1  -> ^AI x + 1
+   b b
+   ^AE x ^AE x + 1
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
 
 2. Nested insertions, where the inner one has a smaller revision number.
+   Or nested deletions, where the inner one has a larger revision number.
It can be rewritten to a non-nested form.
 
+   Take insertions as example, deletions are similar:
+
^AI x + 1 ^AI x + 1
+   a a
^AI x  -> ^AE x + 1
-   ^AE x ^AI x
-   ^AE x + 1 ^AE x
+   b ^AI x
+   ^AE x b
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
+
+3. Insertion inside deletion with a smaller revision number.
 
-3. Insertion or deletion inside another deletion, where the outer deletion
-   block has a smaller revision number.
+   Rewrite by duplicating the content inserted:
 
^AD x  ^AD x
-   ^AI/D x + 1 -> ^AE x
-   ^AE x + 1  ^AI/D x + 1
-   ^AE x  ^AE x
+   a  a
+   ^AI x + 1  ->  b
+   b  c
+   ^AE x + 1  ^AE x
+   c  ^AI x + 1
+   ^AE x  b
+  ^AE x + 1
+
+   Note: If "annotate" purely depends on "^AI" information, then the
+   duplication content will lose track of where "b" is originally from.
 
   Some of them may be valid in other implementations for special purposes. For
   example, to "revive" a previously deleted block in a newer revision.
@@ -249,3 +272,31 @@
 
   "c" makes "hg absorb" easier to implement and makes it possible to do
   "annotate --deleted".
+
+1.4 Malformed Cases Handling
+
+  The following "case 1", "case 2", and "case 3" refer to cases mentioned
+  in section 0.5.
+
+  Using the exposed API (replacelines), case 1 is impossible to generate,
+  although it's possible to generate it by constructing rawdata and load that
+  via linelog.fromdata.
+
+  Doing annotate(maxrev) before replacelines (aka. a1, a2 passed to
+  replacelines are related to the latest revision) eliminates the possibility
+  of case 3. That makes sense since usually you'd like to make edits on top of
+  the latest revision. Practically, both absorb and fastannotate does this.
+
+  Doing annotate(maxrev), plus replacelines(rev, ...) where rev >= maxrev
+  eliminates the possibility of case 2. That makes sense since usually the
+  edits belong to "new revisions", not "old revisions". Practically,
+  fastannotate does this. Absorb calls replacelines with rev < maxrev to edit
+  past revisions. So it needs some extra care to not generate case 2.
+
+  If case 1 occurs, that probably means linelog file corruption (assuming
+  linelog is edited via public APIs) the checkout or annotate result could
+  be less meaningful or even error out, but linelog wouldn't enter an infinite
+  loop.
+
+  If either case 2 or 3 occurs, linelog works as if the inner "^AI/D" and "^AE"
+  operations on the left side are silently ignored.



To: quark, #hg-reviewers
Cc: martinvonz, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4149: linelog: extract `len(self._program)` to a local function

2018-08-06 Thread quark (Jun Wu)
quark updated this revision to Diff 10026.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4149?vs=10024=10026

REVISION DETAIL
  https://phab.mercurial-scm.org/D4149

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -308,7 +308,8 @@
 else:
 a1info = ar.lines[a1]
 a1inst = self._program[a1info._offset]
-oldproglen = len(self._program)
+programlen = self._program.__len__
+oldproglen = programlen()
 appendinst = self._program.append
 
 # insert



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4148: perf: add a command to benchmark linelog edits

2018-08-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The use pattern of creating a linelog is usually by calling "replacelines"
  multiple times. Add a command to benchmark it.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4148

AFFECTED FILES
  contrib/perf.py

CHANGE DETAILS

diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -889,6 +889,34 @@
 timer(lambda: len(repo.lookup(rev)))
 fm.end()
 
+@command('perflinelogedits',
+ [('n', 'edits', 1, 'number of edits'),
+  ('', 'max-hunk-lines', 10, 'max lines in a hunk'),
+ ], norepo=True)
+def perflinelogedits(ui, **opts):
+from mercurial import linelog
+
+edits = opts['edits']
+maxhunklines = opts['max_hunk_lines']
+
+def d():
+maxb1 = 10
+random.seed(0)
+randint = random.randint
+currentlines = 0
+ll = linelog.linelog()
+for rev in xrange(edits):
+a1 = randint(0, currentlines)
+a2 = randint(a1, min(currentlines, a1 + maxhunklines))
+b1 = randint(0, maxb1)
+b2 = randint(b1, b1 + maxhunklines)
+currentlines += (b2 - b1) - (a2 - a1)
+ll.replacelines(rev, a1, a2, b1, b2)
+
+timer, fm = gettimer(ui, opts)
+timer(d)
+fm.end()
+
 @command('perfrevrange', formatteropts)
 def perfrevrange(ui, repo, *specs, **opts):
 timer, fm = gettimer(ui, opts)



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4147: linelog: update internal help text

2018-08-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This clarifies the details asked by @martinvonz on 
https://phab.mercurial-scm.org/D3990.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4147

AFFECTED FILES
  mercurial/help/internals/linelog.txt

CHANGE DETAILS

diff --git a/mercurial/help/internals/linelog.txt 
b/mercurial/help/internals/linelog.txt
--- a/mercurial/help/internals/linelog.txt
+++ b/mercurial/help/internals/linelog.txt
@@ -112,26 +112,49 @@
 1. Interleaved insertions, or interleaved deletions.
It can be rewritten to a non-interleaved tree structure.
 
-   ^AI/D x ^AI/D x
-   ^AI/D y  -> ^AI/D y
-   ^AE x   ^AE y
-   ^AE y   ^AE x
+   Take insertions as example, deletions are similar:
+
+   ^AI x ^AI x
+   a a
+   ^AI x + 1  -> ^AI x + 1
+   b b
+   ^AE x ^AE x + 1
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
 
 2. Nested insertions, where the inner one has a smaller revision number.
+   Or nested deletions, where the inner one has a larger revision number.
It can be rewritten to a non-nested form.
 
+   Take insertions as example, deletions are similar:
+
^AI x + 1 ^AI x + 1
+   a a
^AI x  -> ^AE x + 1
-   ^AE x ^AI x
-   ^AE x + 1 ^AE x
+   b ^AI x
+   ^AE x b
+   c ^AE x
+   ^AE x + 1 ^AI x + 1
+ c
+ ^AE x + 1
+
+3. Insertion inside deletion with a smaller revision number.
 
-3. Insertion or deletion inside another deletion, where the outer deletion
-   block has a smaller revision number.
+   Rewrite by duplicating the content inserted:
 
^AD x  ^AD x
-   ^AI/D x + 1 -> ^AE x
-   ^AE x + 1  ^AI/D x + 1
-   ^AE x  ^AE x
+   a  a
+   ^AI x + 1  ->  b
+   b  c
+   ^AE x + 1  ^AE x
+   c  ^AI x + 1
+   ^AE x  b
+  ^AE x + 1
+
+   Note: If "annotate purely depends on "^AI" information, then the
+   duplication content will lose track of where "b" is originally from.
 
   Some of them may be valid in other implementations for special purposes. For
   example, to "revive" a previously deleted block in a newer revision.
@@ -249,3 +272,31 @@
 
   "c" makes "hg absorb" easier to implement and makes it possible to do
   "annotate --deleted".
+
+1.4 Malformed Cases Handling
+
+  The following "case 1", "case 2", and "case 3" refer to cases mentioned
+  in section 0.5.
+
+  Using the exposed API (replacelines), case 1 is impossible to generate,
+  although it's possible to generate it by constructing rawdata and load that
+  via linelog.fromdata.
+
+  Doing annotate(maxrev) before replacelines (aka. a1, a2 passed to
+  replacelines are related to the latest revision) eliminates the possibility
+  of case 3. That makes sense since usually you'd like to make edits on top of
+  the latest revision. Practically, both absorb and fastannotate does this.
+
+  Doing annotate(maxrev), plus replacelines(rev, ...) where rev >= maxrev
+  eliminates the possibility of case 2. That makes sense since usually the
+  edits belong to "new revisions", not "old revisions". Practically,
+  fastannotate does this. Absorb calls replacelines with rev < maxrev to edit
+  past revisions. So it needs some extra care to not generate case 2.
+
+  If case 1 occurs, that probably means linelog file corruption (assuming
+  linelog is edited via public APIs) the checkout or annotate result could
+  be less meaningful or even error out, but linelog wouldn't enter an infinite
+  loop.
+
+  If either case 2 or 3 occurs, linelog works as if the inner "^AI/D" and "^AE"
+  operations on the left side are silently ignored.



To: quark, #hg-reviewers
Cc: martinvonz, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4150: linelog: optimize replacelines

2018-08-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The optimization to avoid calling `annotate` inside replacelines is critical
  for practical uses.
  
  Before this patch:
  
hg perflinelogedits
! wall 10.883419 comb 10.77 user 10.67 sys 0.10 (best of 3)
  
  After this patch:
  
hg perflinelogedits
! wall 0.219167 comb 0.22 user 0.21 sys 0.01 (best of 39)

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4150

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -313,17 +313,22 @@
 appendinst = self._program.append
 
 # insert
+blineinfos = []
+bappend = blineinfos.append
 if b1 < b2:
 # Determine the jump target for the JGE at the start of
 # the new block.
 tgt = oldproglen + (b2 - b1 + 1)
 # Jump to skip the insert if we're at an older revision.
 appendinst(_jl(rev, tgt))
 for linenum in pycompat.xrange(b1, b2):
 if _internal_blines is None:
+bappend(lineinfo(rev, linenum, programlen()))
 appendinst(_line(rev, linenum))
 else:
-appendinst(_line(*_internal_blines[linenum]))
+newrev, newlinenum = _internal_blines[linenum]
+bappend(lineinfo(newrev, newlinenum, programlen()))
+appendinst(_line(newrev, newlinenum))
 # delete
 if a1 < a2:
 if a2 > len(ar.lines):
@@ -342,19 +347,26 @@
 endaddr = ar.lines[a2 - 1]._offset + 1
 appendinst(_jge(rev, endaddr))
 # copy instruction from a1
+a1instpc = programlen()
 appendinst(a1inst)
 # if a1inst isn't a jump or EOF, then we need to add an unconditional
 # jump back into the program here.
 if not isinstance(a1inst, (_jump, _eof)):
 appendinst(_jump(0, a1info._offset + 1))
 # Patch instruction at a1, which makes our patch live.
 self._program[a1info._offset] = _jump(0, oldproglen)
-# For compat with the C version, re-annotate rev so that
-# self.annotateresult is cromulent.. We could fix up the
-# annotateresult in place (which is how the C version works),
-# but for now we'll pass on that and see if it matters in
-# practice.
-self.annotate(max(self._lastannotate.rev, rev))
+
+# Update self._lastannotate in place. This serves as a cache to avoid
+# expensive "self.annotate" in this function. It is critical to
+# performance.
+if len(self._lastannotate.lines) > a1:
+self._lastannotate.lines[a1]._offset = a1instpc
+else:
+assert isinstance(a1inst, _eof)
+self._lastannotate._eof = a1instpc
+self._lastannotate.lines[a1:a2] = blineinfos
+self._lastannotate.rev = max(self._lastannotate.rev, rev)
+
 if rev > self._maxrev:
 self._maxrev = rev
 



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4149: linelog: extract `len(self._program)` to a local function

2018-08-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This is a micro optimization prepared for following changes where
  `len(self._program)` is used in a loop.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4149

AFFECTED FILES
  mercurial/linelog.py

CHANGE DETAILS

diff --git a/mercurial/linelog.py b/mercurial/linelog.py
--- a/mercurial/linelog.py
+++ b/mercurial/linelog.py
@@ -296,6 +296,7 @@
 else:
 ar = self.annotate(rev)
 #ar = self.annotate(self._maxrev)
+programlen = self._program.__len__
 if a1 > len(ar.lines):
 raise LineLogError(
 '%d contains %d lines, tried to access line %d' % (
@@ -308,7 +309,7 @@
 else:
 a1info = ar.lines[a1]
 a1inst = self._program[a1info._offset]
-oldproglen = len(self._program)
+oldproglen = programlen()
 appendinst = self._program.append
 
 # insert



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4049: testrunner: allow multiple #testcases

2018-08-03 Thread quark (Jun Wu)
quark added a comment.


  `--stack` should work as expected if dependency is set manually.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4049

To: martinvonz, #hg-reviewers, mharbison72
Cc: quark, lothiraldan, mharbison72, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D4049: testrunner: allow multiple #testcases

2018-08-03 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D4049#63026, @martinvonz wrote:
  
  > Sorry, I was just misremembering that https://phab.mercurial-scm.org/D4052 
had been queued. I don't know why phabricator doesn't understand that this 
patch is on top of https://phab.mercurial-scm.org/D4052. Anyway, try applying 
https://phab.mercurial-scm.org/D4052 first, then this one.
  
  
  There is no formal API to set dependency. So you need to click "Edit Related 
Revisions... -> Edit Parent Revisions" on the right sidebar manually.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4049

To: martinvonz, #hg-reviewers, mharbison72
Cc: quark, lothiraldan, mharbison72, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3994: fastannotate: initial import from Facebook's hg-experimental

2018-08-02 Thread quark (Jun Wu)
quark added a comment.


  I'd also like to see C linelog benchmark data mentioned. The current commit 
message implies diff algorithm is the bottleneck. That's misleading.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3994

To: durin42, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3994: fastannotate: initial import from Facebook's hg-experimental

2018-08-01 Thread quark (Jun Wu)
quark added a comment.


  I would mention in the commit message that building cache is much faster with 
linkrevcache prebuilt.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3994

To: durin42, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3986: split: abort if there's an unfinished operation

2018-08-01 Thread quark (Jun Wu)
quark added a comment.


  I think a most flexible solution is to not do the check if there is nothing 
to rebase.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3986

To: martinvonz, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3986: split: abort if there's an unfinished operation

2018-07-30 Thread quark (Jun Wu)
quark added a comment.


  FB has users reporting they need to split commits in the middle of a 
histedit. So this might be too restrictive.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3986

To: martinvonz, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3980: contrib/phabricator: Convert description into local

2018-07-25 Thread quark (Jun Wu)
quark added a comment.


  Yeah, if only there is a `json.loadb` function. That could replace 
`json.loads` at line 211. I guess it could be done by using a function that 
recursively convert strings.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3980

To: ced, #hg-reviewers
Cc: quark, martinvonz, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3818: scmutil: make cleanupnodes optionally also fix the phase

2018-06-21 Thread quark (Jun Wu)
quark added a comment.


  Not directly related to this patch. On API complexity: One of the 
unimplemented ideas is to require a transaction and make `operation` optional - 
default to the transaction name.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3818

To: martinvonz, #hg-reviewers
Cc: quark, mharbison72, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D3665#58932, @johnstiles wrote:
  
  > Looks like your font is missing the dashed vertical line, and has an oddly 
small regular-circle glyph. I don't recognize the font at all so I can't really 
speak much more to that. Fortunately though...
  >
  > (a) it's an extension which isn't on by default
  >  (b) your font choices are your own. That doesn't look like any default 
font I've ever seen, so this is unlikely to affect many others
  
  
  Off topic. But there are many CJK fonts that you probably haven't seen. 
People using them are not a minority. So it's unfair to say "this is unlikely 
to affect many others".
  
  > I cannot possibly promise that this extension will look good in every OS, 
font and terminal. That's not a reasonable goal. If I thought looking perfect 
everywhere was an option, I'd be patching the mainline graph view instead of 
making an extension that opts-out by default.
  > 
  > Do you have any ideas to improve the situation? There has been plenty of 
font discussion in the thread already; this is not really covering any new 
ground.
  
  I believe the most "correct" solution is to revise the Unicode standard and 
upgrade fonts. But that cannot happen anytime soon. I do feel sad about the 
current situation.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  Since you mentioned Linux... Here is what your extension renders on my Linux 
terminal:
  
  F103407: 2018-06-16-122437_1046x658_scrot.png 

  
  The characters do not render as question marks, because I have fonts covering 
them and fontconfig smartly choose fallback fonts (Windows does similar things).
  
  I'm not saying others render the same as mine, but I'd repeat - "width" is 
per character, and is generally "undefined" - some of them are wide, some are 
narrow.
  
  You can say "it works great on *my* Linux", but not other's Linux. Depending 
on what fonts are installed, what the rendering engine is, what fontconfig says 
about font substitution rules, etc. There are just too many ways to get ugly 
results. And it still complies the Unicode standard - since it's undefined.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  To be clear, I have no interest in +1 or -1 this feature, and I'm not 
interested in spending more time testing it. I think I have made it very clear 
that Windows (at least WSL) is going to be a headache. Not to say, Linux (as my 
primary OS) font rendering is another story that might surprise you.
  
  At a low-level, the Unicode specification does not define some characters to 
be "wide" or "narrow" explicitly. Using them is like "undefined behavior" 
depending on the actual font. `encoding._wide` cannot be accurate for two 
reasons - 1. no way to get the font details; 2. width is per character, not a 
global thing.
  
  I have seen people testing software on macOS only, then think it improves 
everyone's life and proudly announce the feature. As a Linux/Windows user, I 
feel sad.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D3665#58864, @johnstiles wrote:
  
  > Are you capable of running the extension?
  >
  > The output of `type` is irrelevant to me. The behavior of python.exe when 
outputting to the Windows shell is all that really matters here. If 
`encoding.encoding` reports UTF8, it should work or there's an Hg issue. If it 
reports something else, the extension will disable itself anyway.
  
  
  Running hg installed in WSL 
 inside cmd.exe, 
`encoding.encoding` is `utf-8`. Since it's still using cmd.exe font, it cannot 
render those fancy characters correctly.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D3665#58862, @johnstiles wrote:
  
  > Without more context I have no idea what you are trying to show. Windows is
  >  certainly capable of rendering Unicode characters in the console. It is
  >  also very possible to get ? characters if you're running a
  >  non-Unicode-aware tool or if there are encoding mix-up issues. "type con"
  >  and pasting in text from a text editor doesn't really prove anything one
  >  way or the other. (What editor? What encoding did it think the data was?
  >  How does "type con" handle Unicode text? etc.)
  >
  > Run the actual hgext and take a screenshot of what it generates if you want
  >  to give a more useful data point for us.
  
  
  Copy-pasting would render utf-8 characters correctly if the font supports 
them. It does not seem to be related to the current codepage.
  
  I edited the screenshot to show an explicit `type a-utf-8-file` with "utf-8" 
(65001) codepage, with different fonts.
  
  Note: Mercurial will raise "unknown encoding: cp65001" in this case so it's 
probably safe if you have the `encoding.encoding != 'utf8'` check. But anything 
rendered using the utf-8 characters (be it from "git bash hg -G ... > a.txt" or 
whatever POSIX-like shells) will render sub-optimally in cmd.exe.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D3665#58860, @johnstiles wrote:
  
  > What are you trying to demonstrate here? I'm lost.
  
  
  I'm sorry you feel lost. Since Windows was mentioned in the thread and I 
happen to have a Windows system, I thought it's somehow useful to post a real 
cmd.exe screenshot with these characters - they do NOT render well. If that's 
not clear or is unrelated to the change, please ignore.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3665: graph: improve graph output by using Unicode characters

2018-06-16 Thread quark (Jun Wu)
quark added a comment.


  Maybe I should change cmd.exe font. But here's what I got pasting the text 
into the console:
  
  F103261: cmd.png 

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3665

To: johnstiles, #hg-reviewers, spectral
Cc: quark, spectral, indygreg, smf, yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3737: crecord: fix line number in hunk header (issue5917)

2018-06-14 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG03350f5234a4: crecord: fix line number in hunk header 
(issue5917) (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3737?vs=9065=9066

REVISION DETAIL
  https://phab.mercurial-scm.org/D3737

AFFECTED FILES
  mercurial/crecord.py
  tests/test-commit-interactive-curses.t

CHANGE DETAILS

diff --git a/tests/test-commit-interactive-curses.t 
b/tests/test-commit-interactive-curses.t
--- a/tests/test-commit-interactive-curses.t
+++ b/tests/test-commit-interactive-curses.t
@@ -214,6 +214,14 @@
   @@ -0,0 +1,1 @@
   +hello world
 
+Make file empty
+  $ echo -n > x
+  $ cat  X
+  > EOF
+  $ hg ci -i -m emptify -d "0 0"
+  $ hg update -C '.^' -q
+
 Editing a hunk puts you back on that hunk when done editing (issue5041)
 To do that, we change two lines in a file, pretend to edit the second line,
 exit, toggle the line selected at the end of the edit and commit.
@@ -236,7 +244,7 @@
   > X
   > EOF
   $ printf "printf 'editor ran\n'; exit 0" > editor.sh
-  $ HGEDITOR="\"sh\" \"`pwd`/editor.sh\"" hg commit  -i -m "edit hunk" -d "0 0"
+  $ HGEDITOR="\"sh\" \"`pwd`/editor.sh\"" hg commit  -i -m "edit hunk" -d "0 
0" -q
   editor ran
   $ hg cat -r . x
   foo
diff --git a/mercurial/crecord.py b/mercurial/crecord.py
--- a/mercurial/crecord.py
+++ b/mercurial/crecord.py
@@ -398,7 +398,7 @@
 if fromline != 0:
 if fromlen == 0:
 fromline -= 1
-if tolen == 0:
+if tolen == 0 and toline > 0:
 toline -= 1
 
 fromtoline = '@@ -%d,%d +%d,%d @@%s\n' % (



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3737: crecord: fix line number in hunk header (issue5917)

2018-06-14 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  `@@ -1,1 +-1,0 @@` is not a valid patch hunk header.
  Change it to `@@ -1,1 +0,0 @@`.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3737

AFFECTED FILES
  mercurial/crecord.py
  tests/test-commit-interactive-curses.t

CHANGE DETAILS

diff --git a/tests/test-commit-interactive-curses.t 
b/tests/test-commit-interactive-curses.t
--- a/tests/test-commit-interactive-curses.t
+++ b/tests/test-commit-interactive-curses.t
@@ -214,6 +214,14 @@
   @@ -0,0 +1,1 @@
   +hello world
 
+Make file empty
+  $ echo -n > x
+  $ cat  X
+  > EOF
+  $ hg ci -i -m emptify -d "0 0"
+  $ hg update -C '.^' -q
+
 Editing a hunk puts you back on that hunk when done editing (issue5041)
 To do that, we change two lines in a file, pretend to edit the second line,
 exit, toggle the line selected at the end of the edit and commit.
@@ -236,7 +244,7 @@
   > X
   > EOF
   $ printf "printf 'editor ran\n'; exit 0" > editor.sh
-  $ HGEDITOR="\"sh\" \"`pwd`/editor.sh\"" hg commit  -i -m "edit hunk" -d "0 0"
+  $ HGEDITOR="\"sh\" \"`pwd`/editor.sh\"" hg commit  -i -m "edit hunk" -d "0 
0" -q
   editor ran
   $ hg cat -r . x
   foo
diff --git a/mercurial/crecord.py b/mercurial/crecord.py
--- a/mercurial/crecord.py
+++ b/mercurial/crecord.py
@@ -403,7 +403,7 @@
 if fromline != 0:
 if fromlen == 0:
 fromline -= 1
-if tolen == 0:
+if tolen == 0 and toline > 0:
 toline -= 1
 
 fromtoline = '@@ -%d,%d +%d,%d @@%s\n' % (



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3720: commandserver: close server's fds explicitly from a worker

2018-06-13 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGc07424ec633c: commandserver: close servers fds 
explicitly from a worker (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3720?vs=9029=9042

REVISION DETAIL
  https://phab.mercurial-scm.org/D3720

AFFECTED FILES
  mercurial/commandserver.py

CHANGE DETAILS

diff --git a/mercurial/commandserver.py b/mercurial/commandserver.py
--- a/mercurial/commandserver.py
+++ b/mercurial/commandserver.py
@@ -494,6 +494,8 @@
 conn.close()  # release handle in parent process
 else:
 try:
+selector.close()
+self._sock.close()
 self._runworker(conn)
 conn.close()
 os._exit(0)



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3720: commandserver: close server's fds explicitly from a worker

2018-06-12 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The forked worker does not need to accept connections from the server's
  socket fd. So let's just close them explicitly to avoid surprises.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3720

AFFECTED FILES
  mercurial/commandserver.py

CHANGE DETAILS

diff --git a/mercurial/commandserver.py b/mercurial/commandserver.py
--- a/mercurial/commandserver.py
+++ b/mercurial/commandserver.py
@@ -494,6 +494,8 @@
 conn.close()  # release handle in parent process
 else:
 try:
+selector.close()
+self._sock.close()
 self._runworker(conn)
 conn.close()
 os._exit(0)



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3698: chg: fix an undefined behavior about memcpy

2018-06-06 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG3c84493556db: chg: fix an undefined behavior about memcpy 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3698?vs=8989=8990

REVISION DETAIL
  https://phab.mercurial-scm.org/D3698

AFFECTED FILES
  contrib/chg/chg.c

CHANGE DETAILS

diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c
--- a/contrib/chg/chg.c
+++ b/contrib/chg/chg.c
@@ -220,7 +220,10 @@
 
const char **argv = mallocx(sizeof(char *) * argsize);
memcpy(argv, baseargv, sizeof(baseargv));
-   memcpy(argv + baseargvsize, opts->args, sizeof(char *) * opts->argsize);
+   if (opts->args) {
+   size_t size = sizeof(char *) * opts->argsize;
+   memcpy(argv + baseargvsize, opts->args, size);
+   }
argv[argsize - 1] = NULL;
 
if (putenv("CHGINTERNALMARK=") != 0)



To: quark, #hg-reviewers
Cc: durin42, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3698: chg: fix an undefined behavior about memcpy

2018-06-06 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Spot by Wez Furlong.  `memcpy(x, NULL, 0)` is undefined according to [1]:
  
  [1]: https://stackoverflow.com/questions/5243012

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3698

AFFECTED FILES
  contrib/chg/chg.c

CHANGE DETAILS

diff --git a/contrib/chg/chg.c b/contrib/chg/chg.c
--- a/contrib/chg/chg.c
+++ b/contrib/chg/chg.c
@@ -220,7 +220,10 @@
 
const char **argv = mallocx(sizeof(char *) * argsize);
memcpy(argv, baseargv, sizeof(baseargv));
-   memcpy(argv + baseargvsize, opts->args, sizeof(char *) * opts->argsize);
+   if (opts->args) {
+   size_t size = sizeof(char *) * opts->argsize;
+   memcpy(argv + baseargvsize, opts->args, size);
+   }
argv[argsize - 1] = NULL;
 
if (putenv("CHGINTERNALMARK=") != 0)



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3556: run-tests: update the test case name format

2018-05-14 Thread quark (Jun Wu)
quark accepted this revision.
quark added a comment.


  I like this change. Didn't realize it's this simple. Marking as accetped.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3556

To: lothiraldan, #hg-reviewers, quark
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3554: xdiff: fix Py_BuildValue types (issue5885)

2018-05-13 Thread quark (Jun Wu)
quark abandoned this revision.
quark added a comment.


  Duplicated with https://phab.mercurial-scm.org/D3538.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3554

To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3554: xdiff: fix Py_BuildValue types (issue5885)

2018-05-13 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Julien Cristau reported and provided the fix [1]. I added an assertion.
  
  Note Python documentation (2.7 to 3.5) says "L" in `Py_BuildValue` might be
  unsupported on some platforms:
  
L (int) [PY_LONG_LONG]
Convert a Python integer to a C long long. This format is only available
on platforms that support long long (or _int64 on Windows).
  
  But it is required to build Python 2.7 itself [2]. So using it won't affect
  supported platforms.
  
  [1]: https://bz.mercurial-scm.org/show_bug.cgi?id=5885#c3
  [2]: https://bugs.python.org/issue27961

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3554

AFFECTED FILES
  mercurial/cext/bdiff.c

CHANGE DETAILS

diff --git a/mercurial/cext/bdiff.c b/mercurial/cext/bdiff.c
--- a/mercurial/cext/bdiff.c
+++ b/mercurial/cext/bdiff.c
@@ -261,7 +261,8 @@
  void *priv)
 {
PyObject *rl = (PyObject *)priv;
-   PyObject *m = Py_BuildValue("", a1, a2, b1, b2);
+   Py_BUILD_ASSERT(sizeof(a1) == sizeof(PY_LONG_LONG));
+   PyObject *m = Py_BuildValue("", a1, a2, b1, b2);
if (!m)
return -1;
if (PyList_Append(rl, m) != 0) {



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-16 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG35632d392279: patch: implement a new worddiff algorithm 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3212?vs=7924=8335

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

AFFECTED FILES
  mercurial/color.py
  mercurial/patch.py
  tests/test-diff-color.t

CHANGE DETAILS

diff --git a/tests/test-diff-color.t b/tests/test-diff-color.t
--- a/tests/test-diff-color.t
+++ b/tests/test-diff-color.t
@@ -337,41 +337,39 @@
   [diff.deleted|-(to see if it works)]
   [diff.inserted|+three of those lines have]
   [diff.inserted|+collapsed onto one]
-#if false
   $ hg diff --config experimental.worddiff=True --color=debug
   [diff.diffline|diff --git a/file1 b/file1]
   [diff.file_a|--- a/file1]
   [diff.file_b|+++ b/file1]
   [diff.hunk|@@ -1,16 +1,17 @@]
-  [diff.deleted|-this is the ][diff.deleted.highlight|first][diff.deleted| 
line]
-  [diff.deleted|-this is the second line]
-  [diff.deleted|-][diff.deleted.highlight|][diff.deleted|third line starts 
with space]
-  [diff.deleted|-][diff.deleted.highlight|+][diff.deleted| starts with a 
][diff.deleted.highlight|plus][diff.deleted| sign]
-  [diff.deleted|-][diff.tab|   ][diff.deleted|this one with 
][diff.deleted.highlight|one][diff.deleted| tab]
-  [diff.deleted|-][diff.tab|   ][diff.deleted|now with full 
][diff.deleted.highlight|two][diff.deleted| tabs]
-  [diff.deleted|-][diff.tab|   ][diff.deleted|now tabs][diff.tab|  
][diff.deleted|everywhere, much fun]
-  [diff.inserted|+that is the first paragraph]
-  [diff.inserted|+][diff.inserted.highlight|][diff.inserted|this is the 
][diff.inserted.highlight|second][diff.inserted| line]
-  [diff.inserted|+third line starts with space]
-  [diff.inserted|+][diff.inserted.highlight|-][diff.inserted| starts with a 
][diff.inserted.highlight|minus][diff.inserted| sign]
-  [diff.inserted|+][diff.tab|  ][diff.inserted|this one with 
][diff.inserted.highlight|two][diff.inserted| tab]
-  [diff.inserted|+][diff.tab|  ][diff.inserted|now with full 
][diff.inserted.highlight|three][diff.inserted| tabs]
-  [diff.inserted|+][diff.tab|  ][diff.inserted|now][diff.inserted.highlight| 
there are][diff.inserted| tabs][diff.tab| ][diff.inserted|everywhere, 
much fun]
+  [diff.deleted|-][diff.deleted.changed|this][diff.deleted.unchanged| is the 
first ][diff.deleted.changed|line]
+  [diff.deleted|-][diff.deleted.unchanged|this is the second line]
+  [diff.deleted|-][diff.deleted.changed|][diff.deleted.unchanged|third 
line starts with space]
+  [diff.deleted|-][diff.deleted.changed|+][diff.deleted.unchanged| starts with 
a ][diff.deleted.changed|plus][diff.deleted.unchanged| sign]
+  [diff.deleted|-][diff.tab|   ][diff.deleted.unchanged|this one with 
][diff.deleted.changed|one][diff.deleted.unchanged| tab]
+  [diff.deleted|-][diff.tab|   ][diff.deleted.unchanged|now with full 
][diff.deleted.changed|two][diff.deleted.unchanged| tabs]
+  [diff.deleted|-][diff.tab|   ][diff.deleted.unchanged|now 
][diff.deleted.unchanged|tabs][diff.tab|   
][diff.deleted.unchanged|everywhere, much fun]
+  [diff.inserted|+][diff.inserted.changed|that][diff.inserted.unchanged| is 
the first ][diff.inserted.changed|paragraph]
+  [diff.inserted|+][diff.inserted.changed|][diff.inserted.unchanged|this 
is the second line]
+  [diff.inserted|+][diff.inserted.unchanged|third line starts with space]
+  [diff.inserted|+][diff.inserted.changed|-][diff.inserted.unchanged| starts 
with a ][diff.inserted.changed|minus][diff.inserted.unchanged| sign]
+  [diff.inserted|+][diff.tab|  ][diff.inserted.unchanged|this one with 
][diff.inserted.changed|two][diff.inserted.unchanged| tab]
+  [diff.inserted|+][diff.tab|  ][diff.inserted.unchanged|now 
with full ][diff.inserted.changed|three][diff.inserted.unchanged| tabs]
+  [diff.inserted|+][diff.tab|  ][diff.inserted.unchanged|now 
][diff.inserted.changed|there are ][diff.inserted.unchanged|tabs][diff.tab| 
  ][diff.inserted.unchanged|everywhere, much fun]

this line won't change

two lines are going to
-  [diff.deleted|-be changed into 
][diff.deleted.highlight|three][diff.deleted|!]
-  [diff.inserted|+(entirely magically,]
-  [diff.inserted|+ assuming this works)]
-  [diff.inserted|+be changed into 
][diff.inserted.highlight|four][diff.inserted|!]
+  [diff.deleted|-][diff.deleted.unchanged|be changed into 
][diff.deleted.changed|three][diff.deleted.unchanged|!]
+  [diff.inserted|+][diff.inserted.changed|(entirely magically,]
+  [diff.inserted|+][diff.inserted.changed| assuming this works)]
+  [diff.inserted|+][diff.inserted.unchanged|be changed into 
][diff.inserted.changed|four][diff.inserted.unchanged|!]

-  [diff.deleted|-three of those lines ][diff.deleted.highlight|will]
-  

D3211: patch: buffer lines for a same hunk

2018-04-16 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG5471348921c1: patch: buffer lines for a same hunk (authored 
by quark, committed by ).

CHANGED PRIOR TO COMMIT
  https://phab.mercurial-scm.org/D3211?vs=7923=8334#toc

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3211?vs=7923=8334

REVISION DETAIL
  https://phab.mercurial-scm.org/D3211

AFFECTED FILES
  mercurial/patch.py
  tests/test-diff-color.t

CHANGE DETAILS

diff --git a/tests/test-diff-color.t b/tests/test-diff-color.t
--- a/tests/test-diff-color.t
+++ b/tests/test-diff-color.t
@@ -337,6 +337,7 @@
   [diff.deleted|-(to see if it works)]
   [diff.inserted|+three of those lines have]
   [diff.inserted|+collapsed onto one]
+#if false
   $ hg diff --config experimental.worddiff=True --color=debug
   [diff.diffline|diff --git a/file1 b/file1]
   [diff.file_a|--- a/file1]
@@ -370,6 +371,7 @@
   [diff.deleted|-(to see if it works)]
   [diff.inserted|+three of those lines ][diff.inserted.highlight|have]
   [diff.inserted|+][diff.inserted.highlight|collapsed][diff.inserted| onto one]
+#endif
 
 multibyte character shouldn't be broken up in word diff:
 
@@ -383,10 +385,13 @@
   > f.write(b"blah \xe3\x82\xa4 blah\n")
   > EOF
   $ hg ci -m 'slightly change utf8 char' utf8
+
+#if false
   $ hg diff --config experimental.worddiff=True --color=debug -c.
   [diff.diffline|diff --git a/utf8 b/utf8]
   [diff.file_a|--- a/utf8]
   [diff.file_b|+++ b/utf8]
   [diff.hunk|@@ -1,1 +1,1 @@]
   [diff.deleted|-blah ][diff.deleted.highlight|\xe3\x82\xa2][diff.deleted| 
blah] (esc)
   [diff.inserted|+blah ][diff.inserted.highlight|\xe3\x82\xa4][diff.inserted| 
blah] (esc)
+#endif
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -11,7 +11,6 @@
 import collections
 import contextlib
 import copy
-import difflib
 import email
 import errno
 import hashlib
@@ -2481,11 +2480,32 @@
 else:
 return difffn(opts, None)
 
+def diffsinglehunk(hunklines):
+"""yield tokens for a list of lines in a single hunk"""
+for line in hunklines:
+# chomp
+chompline = line.rstrip('\n')
+# highlight tabs and trailing whitespace
+stripline = chompline.rstrip()
+if line[0] == '-':
+label = 'diff.deleted'
+elif line[0] == '+':
+label = 'diff.inserted'
+else:
+raise error.ProgrammingError('unexpected hunk line: %s' % line)
+for token in tabsplitter.findall(stripline):
+if '\t' == token[0]:
+yield (token, 'diff.tab')
+else:
+yield (token, label)
+
+if chompline != stripline:
+yield (chompline[len(stripline):], 'diff.trailingwhitespace')
+if chompline != line:
+yield (line[len(chompline):], '')
+
 def difflabel(func, *args, **kw):
 '''yields 2-tuples of (output, label) based on the output of func()'''
-inlinecolor = False
-if kw.get(r'opts'):
-inlinecolor = kw[r'opts'].worddiff
 headprefixes = [('diff', 'diff.diffline'),
 ('copy', 'diff.extended'),
 ('rename', 'diff.extended'),
@@ -2497,125 +2517,59 @@
 ('---', 'diff.file_a'),
 ('+++', 'diff.file_b')]
 textprefixes = [('@', 'diff.hunk'),
-('-', 'diff.deleted'),
-('+', 'diff.inserted')]
+# - and + are handled by diffsinglehunk
+   ]
 head = False
+
+# buffers a hunk, i.e. adjacent "-", "+" lines without other changes.
+hunkbuffer = []
+def consumehunkbuffer():
+if hunkbuffer:
+for token in diffsinglehunk(hunkbuffer):
+yield token
+hunkbuffer[:] = []
+
 for chunk in func(*args, **kw):
 lines = chunk.split('\n')
-matches = {}
-if inlinecolor:
-matches = _findmatches(lines)
 linecount = len(lines)
 for i, line in enumerate(lines):
 if head:
 if line.startswith('@'):
 head = False
 else:
 if line and not line.startswith((' ', '+', '-', '@', '\\')):
 head = True
-stripline = line
 diffline = False
 if not head and line and line.startswith(('+', '-')):
-# highlight tabs and trailing whitespace, but only in
-# changed lines
-stripline = line.rstrip()
 diffline = True
 
 prefixes = textprefixes
 if head:
 prefixes = headprefixes
-for prefix, label in prefixes:
-if stripline.startswith(prefix):
-if diffline:
-if i in matches:
-for t, l in _inlinediff(lines[i].rstrip(),
-

D3210: patch: move yielding "\n" to the end of loop

2018-04-16 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG8d730f96e792: patch: move yielding \n to the 
end of loop (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D3210?vs=7922=8333

REVISION DETAIL
  https://phab.mercurial-scm.org/D3210

AFFECTED FILES
  mercurial/patch.py

CHANGE DETAILS

diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -2505,9 +2505,8 @@
 matches = {}
 if inlinecolor:
 matches = _findmatches(lines)
+linecount = len(lines)
 for i, line in enumerate(lines):
-if i != 0:
-yield ('\n', '')
 if head:
 if line.startswith('@'):
 head = False
@@ -2546,6 +2545,8 @@
 yield (line, '')
 if line != stripline:
 yield (line[len(stripline):], 'diff.trailingwhitespace')
+if i + 1 < linecount:
+yield ('\n', '')
 
 def _findmatches(slist):
 '''Look for insertion matches to deletion and returns a dict of



To: quark, #hg-reviewers, durin42
Cc: yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-12 Thread quark (Jun Wu)
quark added a comment.


  I think one alternative is just to use `green_background` like 
`git/contrib/diff-highlight/diff-highlight.perl`. It satisfies all properties 
I'd like to have, and is supported by weird terminals including cmd.exe and 
less.exe. And is different from `diff.file_a`, `diff.file_b` colors. I'll 
probably just use this.
  
  F71015: 2018-04-12-193346_956x996_scrot.png 


REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42, yuja
Cc: indygreg, dhduvall, yuja, spectral, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3210: patch: move yielding "\n" to the end of loop

2018-04-11 Thread quark (Jun Wu)
quark added a comment.


  I thought it was `for line in mdiff.splitnewlines(...)`. If we have both 
`rawline` and `line` variables, then it is easier.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3210

To: quark, #hg-reviewers
Cc: yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-11 Thread quark (Jun Wu)
quark added a comment.


  In https://phab.mercurial-scm.org/D3212#51917, @yuja wrote:
  
  > Can you split a patch changing the color scheme so we can easily
  >  back it out as needed?
  
  
  Note the color configs are not entirely equivalent to the old code. To give 
an example:
  
-LINE-1
-LINE-2
-LINE-3
-LINE-4-FOO
+LINE-4-BAR
  
  The old code will use "normal" color for LINE-1 to LINE-3. And highlight only 
"FOO", "BAR". The new code will treat LINE-1 to 3 and "FOO", "BAR" the same, 
because they are the "changed" part, and treat "LINE-4-" differently. If we use 
"bold" for "changed" then it's much easier to make large hunks of code bold, 
which will look noisy.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42, yuja
Cc: indygreg, dhduvall, yuja, spectral, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-10 Thread quark (Jun Wu)
quark added inline comments.

INLINE COMMENTS

> spectral wrote in color.py:94
> I understand the reasoning for wanting `diff.inserted` and 
> `diff.inserted.changed` to be the same color, but unfortunately I think it 
> might not be super feasible in upstream.
> 
> 'dim' support in my terminfo database (and via testing) is actually pretty 
> common among the biggest terminals; for oddities, I'm seeing:
> 
> - the linux console (very few people use it) has weird support
> - screen simulates it with underline, tmux passes it through (so I don't see 
> it)
> - rxvt-unicode doesn't support it
> - the terminfo profile for cygwin doesn't indicate support for it
> - Apple's terminal advertises itself as xterm-compatible and seems to support 
> it
> - iTerm2 similarly
> 
> I'm willing to be convinced it's OK, especially since this mode isn't the 
> default.  @dhduvall originally wrote the "not supported by many terminals" 
> bit, I wonder if they have any suggestions.  @indygreg has also dealt with 
> terminal issues (and deals with windows more than me, so might know more 
> there).

There are not many choices - dim, 16/256 colors, or bold. We ended up with 
16/256 colors internally for wider support (ex. tmux). But I'd like to express 
my (strong) options that:

- diff.inserted.changed and diff.inserted are same
- diff.inserted is not bold

in this patch. Because color.py has no 16/256 support yet (and if it does 
detection conservatively, most terminals will only report 8 color support). And 
the only remaining choice is "dim". For weird terminals like tmux, I think it's 
their bugs to fix, not this patch.

That said, I'm fine with changing the defaults to whatever. So feel free to 
send follow-ups changing it.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42, yuja
Cc: indygreg, dhduvall, yuja, spectral, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3210: patch: move yielding "\n" to the end of loop

2018-04-10 Thread quark (Jun Wu)
quark added a comment.


  That's ideal. But a lot of code in this area expects "line" to not contain 
"\n". So the change won't be as easy as it looks.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3210

To: quark, #hg-reviewers
Cc: yuja, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-10 Thread quark (Jun Wu)
quark added a comment.


  Git first had a `contrib/diff-highlight/diff-highlight` script which inverts 
foreground/background for hunks with len(deleted_lines) = len(inserted_lines).
  
  Then the latest version shows diff inline. That is:
  
common words [+inserted words with green color][-deleted words with red 
color] common words
  
  I dislike that, since it could be ambiguous ("[+ x]" could be part of the 
original text).
  
  For colors, it's really a hard question. I think if we can detect "dim" is 
unsupported and make it a no-op, then it'd be fine to use. `infocmp` can report 
"dim" correctly on my Linux terminal. Internally, we patched color.py to use 16 
and 256 colors even if terminfo reports 8 colors. But I guess that's not an 
acceptable solution here.

INLINE COMMENTS

> yuja wrote in patch.py:2536
> Nit: maybe we can sort out tokens here instead of re-parsing tabs, newlines, 
> trailing whitespaces later.
> 
> But I'm not sure if that will make things simpler.

For a split list `['\n', '\t', ' ']`, mdiff might return a hunk that joins 
them. So `''.join(al[a1:a2])` will become more complex.

I think having mdiff step free from EOL/tab handling makes the code easier to 
read.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42, yuja
Cc: yuja, spectral, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-09 Thread quark (Jun Wu)
quark added inline comments.

INLINE COMMENTS

> spectral wrote in color.py:94
> These are the first uses of 'dim' in the default set of things, and I don't 
> think we can rely on it; for color.mode=auto, we really mean "ansi" (aka 
> ecma48) (unless on windows), and don't do any detection of capabilities, so 
> we just output \e[2m and some terminals just ignore that (like mine, 
> rxvt-unicode v9.22).  If using color.mode=terminfo, we at least get error 
> messages (I did --config color.log.user='dim green'):
> 
> ignoring unknown color/effect 'dim' (configured in color.log.user)
> 
> Apparently cygwin doesn't advertise 'dim', and while the linux virtual 
> console advertises it and supports it, it doesn't actually support a dim 
> color (at least on my machine), it just always puts it in a weird blue :)
> 
> I think I'd prefer that `changed` be bold and `unchanged` be non-bold.  For 
> most terminals, that'll lead to a visible difference in intensity (bold being 
> brighter unless using a weird palette), and for those that aren't configured 
> for that, it'll at least be a heavier weight.  It's better than having 
> literally zero difference between them without any explanation why.  I think 
> it'll also be more obvious which lines have it; in your screenshot the 
> difference between dim and regular is pretty subtle.

As I mentioned in the summary, I believe `diff.inserted` and 
`diff.inserted.changed` should have a same color. And `diff.inserted` probably 
shouldn't be bold.

Looking at this review page, you will notice the `diff.inserted` and 
`diff.inserted.changed` are using a same color, where `diff.inserted.unchanged` 
is using a different (lighter) background color.

`dim` works fine where it is supported. For terminals that do not support it, 
people can override the settings. For `dim` feature detection, that's an issue 
in the color code which is unrelated to this change. Since worddiff is 
experimental and off by default, I don't think dim detection should block this 
patch.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42
Cc: spectral, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3209: amend: exit 0 if there are no changes

2018-04-09 Thread quark (Jun Wu)
quark added a comment.


  I think it depends on what scripts want to do. I guess mpm's original point 
is, suppose you have a build script, or something that should do nothing if 
nothing changed, then `hg amend && build_script` just works as expected. If 
amend returns 0, then it'd be more complex to detect "nothing changed" case.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3209

To: spectral, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3209: amend: exit 0 if there are no changes

2018-04-09 Thread quark (Jun Wu)
quark added a comment.


  Returning 1 is actually more consistent with other core commands like pull, 
push, commit. See 
https://www.mercurial-scm.org/pipermail/mercurial-devel/2012-January/037711.html.
 Scripts should be updated to use `$?` explicitly.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3209

To: spectral, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2668: rebase: introduce support for automatically rebasing orphan changes

2018-04-09 Thread quark (Jun Wu)
quark added a subscriber: mbthomas.
quark added a comment.


  In https://phab.mercurial-scm.org/D2668#51423, @durin42 wrote:
  
  > I like the way `--auto` (or some other name) is discoverable in `hg help 
rebase`. Losing that inside the long-form prose of the help text (which is the 
only place I can think of to document a magic rebase-specific revset?) seems 
like a bummer to me...
  
  
  We recently had some internal discussion about discovery. @mbthomas mentioned 
that help text is a way of discovery and help text is not only about command 
line flags. i.e. there could be some examples in help text. I think that might 
be actually better not only because it keeps CLI clean, but also because a flag 
has limited (half of a line) space to explain things while an example could 
have multi-line explanation.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2668

To: durin42, #hg-reviewers, indygreg
Cc: mbthomas, spectral, quark, indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3212: patch: implement a new worddiff algorithm

2018-04-09 Thread quark (Jun Wu)
quark added a comment.


  This is the before and after comparison:
  
  F69700: worddiff-compare.png 

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3212

To: quark, #hg-reviewers, durin42
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3153: tests: call rawsize() directly

2018-04-06 Thread quark (Jun Wu)
quark added a comment.


  It was `revlog.revlog.size`, and got changed to `rawsize` before committed.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3153

To: indygreg, #hg-reviewers
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3138: rebase: make "destnode" consistently a revnum and rename it to "destrev"

2018-04-05 Thread quark (Jun Wu)
quark accepted this revision.
quark added a comment.


  Thanks. I was not too happy about the old code using `rev` as `node`s. But I 
didn't spend much time changing them.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3138

To: martinvonz, #hg-reviewers, quark
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3139: rebase: convert "oldrev" to revnum earlier

2018-04-05 Thread quark (Jun Wu)
quark accepted this revision.
quark added a comment.


  Nice catch. I guess my initial consideration was `oldrev` was not always a 
valid node. But it seems no longer a concern.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3139

To: martinvonz, #hg-reviewers, quark
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D3140: rebase: fix explicit handling of nullid in rebase state

2018-04-05 Thread quark (Jun Wu)
quark accepted this revision.
quark added a comment.


  Hmm... maybe just remove it.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3140

To: martinvonz, #hg-reviewers, quark
Cc: quark, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2057: rust implementation of hg status

2018-03-20 Thread quark (Jun Wu)
quark added a comment.


  https://crates.io/crates/local-encoding seems to be the right choice.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2057

To: Ivzhh, #hg-reviewers, kevincox
Cc: quark, yuja, glandium, krbullock, indygreg, durin42, kevincox, 
mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2848: xdiff: move stdint.h to xdiff.h

2018-03-13 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGafebb7588e95: xdiff: move stdint.h to xdiff.h (authored by 
quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2848?vs=7005=7006

REVISION DETAIL
  https://phab.mercurial-scm.org/D2848

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xinclude.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xinclude.h 
b/mercurial/thirdparty/xdiff/xinclude.h
--- a/mercurial/thirdparty/xdiff/xinclude.h
+++ b/mercurial/thirdparty/xdiff/xinclude.h
@@ -24,13 +24,6 @@
 #define XINCLUDE_H
 
 #include 
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-#include 
-#else
-/* prior to Visual Studio 2010 */
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#endif
 #include 
 #include 
 #include 
diff --git a/mercurial/thirdparty/xdiff/xdiff.h 
b/mercurial/thirdparty/xdiff/xdiff.h
--- a/mercurial/thirdparty/xdiff/xdiff.h
+++ b/mercurial/thirdparty/xdiff/xdiff.h
@@ -29,6 +29,14 @@
 
 #include  /* size_t */
 
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+#include 
+#else
+/* prior to Visual Studio 2010 */
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
 /* xpparm_t.flags */
 #define XDF_NEED_MINIMAL (1 << 0)
 



To: quark, #hg-reviewers, durin42
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2848: xdiff: move stdint.h to xdiff.h

2018-03-13 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  It's more correct to put it in xdiff.h since that file actually uses int64_t
  etc and xdiff.h is included by xinclude.h.
  
  This should fix the oss-fuzz build. Thanks durin42 for discovering the
  issue.

TEST PLAN
  `make local` and xdiff related tests still work.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2848

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xinclude.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xinclude.h 
b/mercurial/thirdparty/xdiff/xinclude.h
--- a/mercurial/thirdparty/xdiff/xinclude.h
+++ b/mercurial/thirdparty/xdiff/xinclude.h
@@ -24,13 +24,6 @@
 #define XINCLUDE_H
 
 #include 
-#if !defined(_MSC_VER) || _MSC_VER >= 1600
-#include 
-#else
-/* prior to Visual Studio 2010 */
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#endif
 #include 
 #include 
 #include 
diff --git a/mercurial/thirdparty/xdiff/xdiff.h 
b/mercurial/thirdparty/xdiff/xdiff.h
--- a/mercurial/thirdparty/xdiff/xdiff.h
+++ b/mercurial/thirdparty/xdiff/xdiff.h
@@ -29,6 +29,14 @@
 
 #include  /* size_t */
 
+#if !defined(_MSC_VER) || _MSC_VER >= 1600
+#include 
+#else
+/* prior to Visual Studio 2010 */
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
 /* xpparm_t.flags */
 #define XDF_NEED_MINIMAL (1 << 0)
 



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH] xdiff: fix a hard crash on Windows

2018-03-12 Thread Jun Wu
Looks good. Thanks for fixing this!

Excerpts from Matt Harbison's message of 2018-03-12 21:53:12 -0400:
> # HG changeset patch
> # User Matt Harbison 
> # Date 1520905818 14400
> #  Mon Mar 12 21:50:18 2018 -0400
> # Node ID 60bb2f7dd9ba313f96374470e8419bf1a20454a1
> # Parent  aed445748c7885482cd90e56e81f57a13d4ac95c
> xdiff: fix a hard crash on Windows
> 
> The xdiff case of test-diff-antipatience.t started crashing in the C extension
> with 882657a9f768 (with 6a71a5ba666b backported so it compiles).  There are a
> few more instances of 'long', but this resolves the crashing.
> 
> diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
> b/mercurial/thirdparty/xdiff/xdiffi.c
> --- a/mercurial/thirdparty/xdiff/xdiffi.c
> +++ b/mercurial/thirdparty/xdiff/xdiffi.c
> @@ -342,7 +342,7 @@ int xdl_do_diff(mmfile_t *mf1, mmfile_t 
>   * One is to store the forward path and one to store the backward path.
>   */
>  ndiags = xe->xdf1.nreff + xe->xdf2.nreff + 3;
> -if (!(kvd = (int64_t *) xdl_malloc((2 * ndiags + 2) * sizeof(long {
> +if (!(kvd = (int64_t *) xdl_malloc((2 * ndiags + 2) * sizeof(int64_t 
> {
>  
>  xdl_free_env(xe);
>  return -1;
> diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
> b/mercurial/thirdparty/xdiff/xprepare.c
> --- a/mercurial/thirdparty/xdiff/xprepare.c
> +++ b/mercurial/thirdparty/xdiff/xprepare.c
> @@ -296,9 +296,9 @@ static int xdl_prepare_ctx(unsigned int 
>  goto abort;
>  memset(rchg, 0, (nrec + 2) * sizeof(char));
>  
> -if (!(rindex = (int64_t *) xdl_malloc((nrec + 1) * sizeof(long
> +if (!(rindex = (int64_t *) xdl_malloc((nrec + 1) * sizeof(int64_t
>  goto abort;
> -if (!(ha = (uint64_t *) xdl_malloc((nrec + 1) * sizeof(unsigned long
> +if (!(ha = (uint64_t *) xdl_malloc((nrec + 1) * sizeof(uint64_t
>  goto abort;
>  
>  xdf->nrec = nrec;
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2763: xdiff: remove unused flags parameter

2018-03-09 Thread quark (Jun Wu)
quark added a comment.


  I don't think the Python ".so"s should be consumed by non-Python "dlopen". So 
"version" doesn't change since Python API remains the same.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2763

To: quark, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2765: xdiff: use int64 for hash table size

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG71fbceb58746: xdiff: use int64 for hash table size 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2765?vs=6802=6809

REVISION DETAIL
  https://phab.mercurial-scm.org/D2765

AFFECTED FILES
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -32,7 +32,7 @@
 int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
 int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2);
 uint64_t xdl_hash_record(char const **data, char const *top);
-unsigned int xdl_hashbits(unsigned int size);
+unsigned int xdl_hashbits(int64_t size);
 
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -141,9 +141,10 @@
return ha;
 }
 
-unsigned int xdl_hashbits(unsigned int size) {
-   unsigned int val = 1, bits = 0;
+unsigned int xdl_hashbits(int64_t size) {
+   int64_t val = 1;
+   unsigned int bits = 0;
 
-   for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, 
bits++);
+   for (; val < size && bits < (int64_t) CHAR_BIT * sizeof(unsigned int); 
val <<= 1, bits++);
return bits ? bits: 1;
 }
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -70,7 +70,7 @@
 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t 
flags) {
cf->flags = flags;
 
-   cf->hbits = xdl_hashbits((unsigned int) size);
+   cf->hbits = xdl_hashbits(size);
cf->hsize = 1 << cf->hbits;
 
if (xdl_cha_init(>ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) {
@@ -262,7 +262,7 @@
goto abort;
 
{
-   hbits = xdl_hashbits((unsigned int) narec);
+   hbits = xdl_hashbits(narec);
hsize = 1 << hbits;
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *
goto abort;



To: quark, #hg-reviewers, indygreg
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2762: xdiff: replace {unsigned ,}long with {u,}int64_t

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGe882437cc082: xdiff: replace {unsigned ,}long with 
{u,}int64_t (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2762?vs=6799=6806

REVISION DETAIL
  https://phab.mercurial-scm.org/D2762

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xdiffi.h
  mercurial/thirdparty/xdiff/xinclude.h
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xtypes.h
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -25,13 +25,13 @@
 
 
 
-long xdl_bogosqrt(long n);
-int xdl_cha_init(chastore_t *cha, long isize, long icount);
+int64_t xdl_bogosqrt(int64_t n);
+int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
-long xdl_guess_lines(mmfile_t *mf, long sample);
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record(char const **data, char const *top, long flags);
+int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags);
+uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags);
 unsigned int xdl_hashbits(unsigned int size);
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -27,8 +27,8 @@
 
 
 
-long xdl_bogosqrt(long n) {
-   long i;
+int64_t xdl_bogosqrt(int64_t n) {
+   int64_t i;
 
/*
 * Classical integer square root approximation using shifts.
@@ -40,20 +40,20 @@
 }
 
 
-void *xdl_mmfile_first(mmfile_t *mmf, long *size)
+void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size)
 {
*size = mmf->size;
return mmf->ptr;
 }
 
 
-long xdl_mmfile_size(mmfile_t *mmf)
+int64_t xdl_mmfile_size(mmfile_t *mmf)
 {
return mmf->size;
 }
 
 
-int xdl_cha_init(chastore_t *cha, long isize, long icount) {
+int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount) {
 
cha->head = cha->tail = NULL;
cha->isize = isize;
@@ -100,8 +100,8 @@
return data;
 }
 
-long xdl_guess_lines(mmfile_t *mf, long sample) {
-   long nl = 0, size, tsize = 0;
+int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample) {
+   int64_t nl = 0, size, tsize = 0;
char const *data, *cur, *top;
 
if ((cur = data = xdl_mmfile_first(mf, )) != NULL) {
@@ -121,15 +121,15 @@
return nl + 1;
 }
 
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags)
 {
if (s1 == s2 && !memcmp(l1, l2, s1))
return 1;
return 0;
 }
 
-unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
-   unsigned long ha = 5381;
+uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) {
+   uint64_t ha = 5381;
char const *ptr = *data;
 
for (; ptr < top && *ptr != '\n'; ptr++) {
diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -27,30 +27,30 @@
 
 typedef struct s_chanode {
struct s_chanode *next;
-   long icurr;
+   int64_t icurr;
 } chanode_t;
 
 typedef struct s_chastore {
chanode_t *head, *tail;
-   long isize, nsize;
+   int64_t isize, nsize;
chanode_t *ancur;
chanode_t *sncur;
-   long scurr;
+   int64_t scurr;
 } chastore_t;
 
 typedef struct s_xrecord {
struct s_xrecord *next;
char const *ptr;
-   long size;
-   unsigned long ha;
+   int64_t size;
+   uint64_t ha;
 } xrecord_t;
 
 typedef struct s_xdfile {
/* manual memory management */
chastore_t rcha;
 
/* number of records (lines) */
-   long nrec;
+   int64_t nrec;
 
/* hash table size
 * the maximum hash value in the table is (1 << hbits) */
@@ -64,7 +64,7 @@
 * [recs[i] for i in range(0, dstart)] are common prefix.
 * [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting
 * lines */
-   long dstart, dend;
+   int64_t dstart, dend;
 
/* pointer to records (lines) */
xrecord_t **recs;
@@ -82,22 +82,22 @@
 * rindex[0] is likely dstart, if not removed up by rule 2.
 * rindex[nreff - 1] is likely dend, if not removed by rule 2.
 */
-   long *rindex;
+   

D2763: xdiff: remove unused flags parameter

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG4c8ffc67bac2: xdiff: remove unused flags parameter 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2763?vs=6800=6808

REVISION DETAIL
  https://phab.mercurial-scm.org/D2763

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -30,8 +30,8 @@
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
-int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags);
-uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags);
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2);
+uint64_t xdl_hash_record(char const **data, char const *top);
 unsigned int xdl_hashbits(unsigned int size);
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -121,14 +121,14 @@
return nl + 1;
 }
 
-int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags)
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2)
 {
if (s1 == s2 && !memcmp(l1, l2, s1))
return 1;
return 0;
 }
 
-uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) {
+uint64_t xdl_hash_record(char const **data, char const *top) {
uint64_t ha = 5381;
char const *ptr = *data;
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -118,7 +118,7 @@
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
if (rcrec->ha == rec->ha &&
xdl_recmatch(rcrec->line, rcrec->size,
-   rec->ptr, rec->size, cf->flags))
+   rec->ptr, rec->size))
break;
 
if (!rcrec) {
@@ -273,7 +273,7 @@
if ((cur = blk = xdl_mmfile_first(mf, )) != NULL) {
for (top = blk + bsize; cur < top; ) {
prev = cur;
-   hav = xdl_hash_record(, top, xpp->flags);
+   hav = xdl_hash_record(, top);
if (nrec >= narec) {
narec *= 2;
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, 
narec * sizeof(xrecord_t *
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -398,12 +398,11 @@
 }
 
 
-static int recs_match(xrecord_t *rec1, xrecord_t *rec2, int64_t flags)
+static int recs_match(xrecord_t *rec1, xrecord_t *rec2)
 {
return (rec1->ha == rec2->ha &&
xdl_recmatch(rec1->ptr, rec1->size,
-rec2->ptr, rec2->size,
-flags));
+rec2->ptr, rec2->size));
 }
 
 /*
@@ -762,10 +761,10 @@
  * following group, expand this group to include it. Return 0 on success or -1
  * if g cannot be slid down.
  */
-static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, int64_t flags)
+static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
 {
if (g->end < xdf->nrec &&
-   recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
+   recs_match(xdf->recs[g->start], xdf->recs[g->end])) {
xdf->rchg[g->start++] = 0;
xdf->rchg[g->end++] = 1;
 
@@ -783,10 +782,10 @@
  * into a previous group, expand this group to include it. Return 0 on success
  * or -1 if g cannot be slid up.
  */
-static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, int64_t flags)
+static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
 {
if (g->start > 0 &&
-   recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
+   recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) {
xdf->rchg[--g->start] = 1;
xdf->rchg[--g->end] = 0;
 
@@ -847,7 +846,7 @@
end_matching_other = -1;
 
/* Shift the group backward as much as possible: */
-   while (!group_slide_up(xdf, , flags))
+   while (!group_slide_up(xdf, ))
if (group_previous(xdfo, ))

D2686: xdiff: add a preprocessing step that trims files

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG665958f30789: xdiff: add a preprocessing step that trims 
files (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2686?vs=6797=6804

REVISION DETAIL
  https://phab.mercurial-scm.org/D2686

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -60,6 +60,10 @@
 
 typedef struct s_xdfenv {
xdfile_t xdf1, xdf2;
+
+   /* number of lines for common prefix and suffix that are removed
+* from xdf1 and xdf2 as a preprocessing step */
+   long nprefix, nsuffix;
 } xdfenv_t;
 
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -156,6 +156,87 @@
 }
 
 
+/*
+ * Trim common prefix from files.
+ *
+ * Note: trimming could affect hunk shifting. But the performance benefit
+ * outweighs the shift change. A diff result with suboptimal shifting is still
+ * valid.
+ */
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+   xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
+   mmfile_t msmall, mlarge;
+   /* prefix lines, prefix bytes, suffix lines, suffix bytes */
+   long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
+   /* prefix char pointer for msmall and mlarge */
+   const char *pp1, *pp2;
+   /* suffix char pointer for msmall and mlarge */
+   const char *ps1, *ps2;
+
+   /* reserved must >= 0 for the line boundary adjustment to work */
+   if (reserved < 0)
+   reserved = 0;
+
+   if (mf1->size < mf2->size) {
+   memcpy(, mf1, sizeof(mmfile_t));
+   memcpy(, mf2, sizeof(mmfile_t));
+   } else {
+   memcpy(, mf2, sizeof(mmfile_t));
+   memcpy(, mf1, sizeof(mmfile_t));
+   }
+
+   pp1 = msmall.ptr, pp2 = mlarge.ptr;
+   for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
+   plines += (*pp1 == '\n');
+   pp1++, pp2++;
+   }
+
+   ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
+   while (ps1 > pp1 && *ps1 == *ps2) {
+   slines += (*ps1 == '\n');
+   ps1--, ps2--;
+   }
+
+   /* Retract common prefix and suffix boundaries for reserved lines */
+   if (plines <= reserved + 1) {
+   plines = 0;
+   } else {
+   i = 0;
+   while (i <= reserved) {
+   pp1--;
+   i += (*pp1 == '\n');
+   }
+   /* The new mmfile starts at the next char just after '\n' */
+   pbytes = pp1 - msmall.ptr + 1;
+   plines -= reserved;
+   }
+
+   if (slines <= reserved + 1) {
+   slines = 0;
+   } else {
+   /* Note: with compiler SIMD support (ex. -O3 -mavx2), this
+* might perform better than memchr. */
+   i = 0;
+   while (i <= reserved) {
+   ps1++;
+   i += (*ps1 == '\n');
+   }
+   /* The new mmfile includes this '\n' */
+   sbytes = msmall.ptr + msmall.size - ps1 - 1;
+   slines -= reserved;
+   if (msmall.ptr[msmall.size - 1] == '\n')
+   slines -= 1;
+   }
+
+   xe->nprefix = plines;
+   xe->nsuffix = slines;
+   out_mf1->ptr = mf1->ptr + pbytes;
+   out_mf1->size = mf1->size - pbytes - sbytes;
+   out_mf2->ptr = mf2->ptr + pbytes;
+   out_mf2->size = mf2->size - pbytes - sbytes;
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, 
xpparam_t const *xpp,
   xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
@@ -254,10 +335,13 @@
xdl_cha_free(>rcha);
 }
 
+/* Reserved lines for trimming, to leave room for shifting */
+#define TRIM_RESERVED_LINES 100
 
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
long enl1, enl2, sample;
+   mmfile_t tmf1, tmf2;
xdlclassifier_t cf;
 
memset(, 0, sizeof(cf));
@@ -270,12 +354,14 @@
if (xdl_init_classifier(, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
-   if (xdl_prepare_ctx(1, mf1, enl1, xpp, , >xdf1) < 0) {
+   xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, , );
+
+   if (xdl_prepare_ctx(1, , enl1, xpp, , >xdf1) < 0) {
 
xdl_free_classifier();
return -1;
}
-   if 

D2764: xdiff: remove unused xpp and xecfg parameters

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG2e2b48cca761: xdiff: remove unused xpp and xecfg parameters 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2764?vs=6801=6807

REVISION DETAIL
  https://phab.mercurial-scm.org/D2764

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -56,7 +56,7 @@
 static void xdl_free_classifier(xdlclassifier_t *cf);
 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, 
xrecord_t **rhash,
   unsigned int hbits, xrecord_t *rec);
-static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, 
xpparam_t const *xpp,
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec,
   xdlclassifier_t *cf, xdfile_t *xdf);
 static void xdl_free_ctx(xdfile_t *xdf);
 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e);
@@ -237,7 +237,7 @@
 }
 
 
-static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, 
xpparam_t const *xpp,
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec,
   xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
int64_t nrec, hsize, bsize;
@@ -356,12 +356,12 @@
 
xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, , );
 
-   if (xdl_prepare_ctx(1, , enl1, xpp, , >xdf1) < 0) {
+   if (xdl_prepare_ctx(1, , enl1, , >xdf1) < 0) {
 
xdl_free_classifier();
return -1;
}
-   if (xdl_prepare_ctx(2, , enl2, xpp, , >xdf2) < 0) {
+   if (xdl_prepare_ctx(2, , enl2, , >xdf2) < 0) {
 
xdl_free_ctx(>xdf1);
xdl_free_classifier();
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -1012,7 +1012,7 @@
  * inside the differential hunk according to the specified configuration.
  * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+xdchange_t *xdl_get_hunk(xdchange_t **xscr)
 {
xdchange_t *xch, *xchp, *lxch;
int64_t max_common = 0;
@@ -1070,7 +1070,7 @@
if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
int64_t i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
for (xch = xscr; xch; xch = xche->next) {
-   xche = xdl_get_hunk(, xecfg);
+   xche = xdl_get_hunk();
if (!xch)
break;
if (xch != xche)
@@ -1089,7 +1089,7 @@
return -1;
} else {
for (xch = xscr; xch; xch = xche->next) {
-   xche = xdl_get_hunk(, xecfg);
+   xche = xdl_get_hunk();
if (!xch)
break;
if (xecfg->hunk_func(xch->i1 + p,



To: quark, #hg-reviewers, indygreg
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2685: xdiff: add comments for fields in xdfile_t

2018-03-09 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG58028f6d1fb8: xdiff: add comments for fields in xdfile_t 
(authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2685?vs=6798=6805

REVISION DETAIL
  https://phab.mercurial-scm.org/D2685

AFFECTED FILES
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -46,15 +46,49 @@
 } xrecord_t;
 
 typedef struct s_xdfile {
+   /* manual memory management */
chastore_t rcha;
+
+   /* number of records (lines) */
long nrec;
+
+   /* hash table size
+* the maximum hash value in the table is (1 << hbits) */
unsigned int hbits;
+
+   /* hash table, hash value => xrecord_t
+* note: xrecord_t is a linked list. */
xrecord_t **rhash;
+
+   /* range excluding common prefix and suffix
+* [recs[i] for i in range(0, dstart)] are common prefix.
+* [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting
+* lines */
long dstart, dend;
+
+   /* pointer to records (lines) */
xrecord_t **recs;
+
+   /* record changed, use original "recs" index
+* rchag[i] can be either 0 or 1. 1 means recs[i] (line i) is marked
+* "changed". */
char *rchg;
+
+   /* cleaned-up record index => original "recs" index
+* clean-up means:
+*  rule 1. remove common prefix and suffix
+*  rule 2. remove records that are only on one side, since they can
+*  not match the other side
+* rindex[0] is likely dstart, if not removed up by rule 2.
+* rindex[nreff - 1] is likely dend, if not removed by rule 2.
+*/
long *rindex;
+
+   /* rindex size */
long nreff;
+
+   /* cleaned-up record index => hash value
+* ha[i] = recs[rindex[i]]->ha */
unsigned long *ha;
 } xdfile_t;
 



To: quark, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2764: xdiff: remove unused xpp and xecfg parameters

2018-03-09 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  They are unused. Thus removed.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2764

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -56,7 +56,7 @@
 static void xdl_free_classifier(xdlclassifier_t *cf);
 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, 
xrecord_t **rhash,
   unsigned int hbits, xrecord_t *rec);
-static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, 
xpparam_t const *xpp,
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec,
   xdlclassifier_t *cf, xdfile_t *xdf);
 static void xdl_free_ctx(xdfile_t *xdf);
 static int xdl_clean_mmatch(char const *dis, int64_t i, int64_t s, int64_t e);
@@ -237,7 +237,7 @@
 }
 
 
-static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec, 
xpparam_t const *xpp,
+static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, int64_t narec,
   xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
int64_t nrec, hsize, bsize;
@@ -356,12 +356,12 @@
 
xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, , );
 
-   if (xdl_prepare_ctx(1, , enl1, xpp, , >xdf1) < 0) {
+   if (xdl_prepare_ctx(1, , enl1, , >xdf1) < 0) {
 
xdl_free_classifier();
return -1;
}
-   if (xdl_prepare_ctx(2, , enl2, xpp, , >xdf2) < 0) {
+   if (xdl_prepare_ctx(2, , enl2, , >xdf2) < 0) {
 
xdl_free_ctx(>xdf1);
xdl_free_classifier();
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -1012,7 +1012,7 @@
  * inside the differential hunk according to the specified configuration.
  * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+xdchange_t *xdl_get_hunk(xdchange_t **xscr)
 {
xdchange_t *xch, *xchp, *lxch;
int64_t max_common = 0;
@@ -1070,7 +1070,7 @@
if ((xecfg->flags & XDL_EMIT_BDIFFHUNK) != 0) {
int64_t i1 = 0, i2 = 0, n1 = xe->xdf1.nrec, n2 = xe->xdf2.nrec;
for (xch = xscr; xch; xch = xche->next) {
-   xche = xdl_get_hunk(, xecfg);
+   xche = xdl_get_hunk();
if (!xch)
break;
if (xch != xche)
@@ -1089,7 +1089,7 @@
return -1;
} else {
for (xch = xscr; xch; xch = xche->next) {
-   xche = xdl_get_hunk(, xecfg);
+   xche = xdl_get_hunk();
if (!xch)
break;
if (xecfg->hunk_func(xch->i1 + p,



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2765: xdiff: use int64 for hash table size

2018-03-09 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Follow-up of the previous "long" -> "int64" change. Now xdiff only uses int
  for return values and small integers (ex. booleans, shifting score, bits in
  hash table size, etc) so it should be able to handle large input.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2765

AFFECTED FILES
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -32,7 +32,7 @@
 int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
 int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2);
 uint64_t xdl_hash_record(char const **data, char const *top);
-unsigned int xdl_hashbits(unsigned int size);
+unsigned int xdl_hashbits(int64_t size);
 
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -141,9 +141,10 @@
return ha;
 }
 
-unsigned int xdl_hashbits(unsigned int size) {
-   unsigned int val = 1, bits = 0;
+unsigned int xdl_hashbits(int64_t size) {
+   int64_t val = 1;
+   unsigned int bits = 0;
 
-   for (; val < size && bits < CHAR_BIT * sizeof(unsigned int); val <<= 1, 
bits++);
+   for (; val < size && bits < (int64_t) CHAR_BIT * sizeof(unsigned int); 
val <<= 1, bits++);
return bits ? bits: 1;
 }
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -70,7 +70,7 @@
 static int xdl_init_classifier(xdlclassifier_t *cf, int64_t size, int64_t 
flags) {
cf->flags = flags;
 
-   cf->hbits = xdl_hashbits((unsigned int) size);
+   cf->hbits = xdl_hashbits(size);
cf->hsize = 1 << cf->hbits;
 
if (xdl_cha_init(>ncha, sizeof(xdlclass_t), size / 4 + 1) < 0) {
@@ -262,7 +262,7 @@
goto abort;
 
{
-   hbits = xdl_hashbits((unsigned int) narec);
+   hbits = xdl_hashbits(narec);
hsize = 1 << hbits;
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *
goto abort;



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2762: xdiff: replace {unsigned ,}long with {u,}int64_t

2018-03-09 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  MSVC treats "long" as 4-byte. That could cause overflows since the xdiff
  code uses "long" in places where "size_t" or "ssize_t" should be used.
  Let's use explicit 8 byte integers to avoid
  
  FWIW git avoids that overflow by limiting diff size to 1GB [1]. After
  examining the code, I think the remaining risk (the use of "int") is low
  since "int" is only used for return values and hash table size. Although a
  wrong hash table size would not affect the correctness of the code, but that
  could make the code extremely slow. The next patch will change hash table
  size to 8-byte integer so the 1GB limit is unlikely needed.
  
  This patch was done by using `sed`.
  
  [1]: https://github.com/git/git/commit/dcd1742e56ebb944c4ff62346da4548e1e3be67

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2762

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xdiffi.h
  mercurial/thirdparty/xdiff/xinclude.h
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xtypes.h
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -25,13 +25,13 @@
 
 
 
-long xdl_bogosqrt(long n);
-int xdl_cha_init(chastore_t *cha, long isize, long icount);
+int64_t xdl_bogosqrt(int64_t n);
+int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount);
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
-long xdl_guess_lines(mmfile_t *mf, long sample);
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record(char const **data, char const *top, long flags);
+int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags);
+uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags);
 unsigned int xdl_hashbits(unsigned int size);
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -27,8 +27,8 @@
 
 
 
-long xdl_bogosqrt(long n) {
-   long i;
+int64_t xdl_bogosqrt(int64_t n) {
+   int64_t i;
 
/*
 * Classical integer square root approximation using shifts.
@@ -40,20 +40,20 @@
 }
 
 
-void *xdl_mmfile_first(mmfile_t *mmf, long *size)
+void *xdl_mmfile_first(mmfile_t *mmf, int64_t *size)
 {
*size = mmf->size;
return mmf->ptr;
 }
 
 
-long xdl_mmfile_size(mmfile_t *mmf)
+int64_t xdl_mmfile_size(mmfile_t *mmf)
 {
return mmf->size;
 }
 
 
-int xdl_cha_init(chastore_t *cha, long isize, long icount) {
+int xdl_cha_init(chastore_t *cha, int64_t isize, int64_t icount) {
 
cha->head = cha->tail = NULL;
cha->isize = isize;
@@ -100,8 +100,8 @@
return data;
 }
 
-long xdl_guess_lines(mmfile_t *mf, long sample) {
-   long nl = 0, size, tsize = 0;
+int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample) {
+   int64_t nl = 0, size, tsize = 0;
char const *data, *cur, *top;
 
if ((cur = data = xdl_mmfile_first(mf, )) != NULL) {
@@ -121,15 +121,15 @@
return nl + 1;
 }
 
-int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags)
 {
if (s1 == s2 && !memcmp(l1, l2, s1))
return 1;
return 0;
 }
 
-unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
-   unsigned long ha = 5381;
+uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) {
+   uint64_t ha = 5381;
char const *ptr = *data;
 
for (; ptr < top && *ptr != '\n'; ptr++) {
diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -27,30 +27,30 @@
 
 typedef struct s_chanode {
struct s_chanode *next;
-   long icurr;
+   int64_t icurr;
 } chanode_t;
 
 typedef struct s_chastore {
chanode_t *head, *tail;
-   long isize, nsize;
+   int64_t isize, nsize;
chanode_t *ancur;
chanode_t *sncur;
-   long scurr;
+   int64_t scurr;
 } chastore_t;
 
 typedef struct s_xrecord {
struct s_xrecord *next;
char const *ptr;
-   long size;
-   unsigned long ha;
+   int64_t size;
+   uint64_t ha;
 } xrecord_t;
 
 typedef struct s_xdfile {
/* manual memory management */
chastore_t rcha;
 
/* number of records (lines) */
-   long nrec;
+  

D2763: xdiff: remove unused flags parameter

2018-03-09 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  After https://phab.mercurial-scm.org/D2683, the flags parameter in some 
functions is no longer needed.
  Thus removed.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2763

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xutils.c
  mercurial/thirdparty/xdiff/xutils.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xutils.h 
b/mercurial/thirdparty/xdiff/xutils.h
--- a/mercurial/thirdparty/xdiff/xutils.h
+++ b/mercurial/thirdparty/xdiff/xutils.h
@@ -30,8 +30,8 @@
 void xdl_cha_free(chastore_t *cha);
 void *xdl_cha_alloc(chastore_t *cha);
 int64_t xdl_guess_lines(mmfile_t *mf, int64_t sample);
-int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags);
-uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags);
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2);
+uint64_t xdl_hash_record(char const **data, char const *top);
 unsigned int xdl_hashbits(unsigned int size);
 
 
diff --git a/mercurial/thirdparty/xdiff/xutils.c 
b/mercurial/thirdparty/xdiff/xutils.c
--- a/mercurial/thirdparty/xdiff/xutils.c
+++ b/mercurial/thirdparty/xdiff/xutils.c
@@ -121,14 +121,14 @@
return nl + 1;
 }
 
-int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2, 
int64_t flags)
+int xdl_recmatch(const char *l1, int64_t s1, const char *l2, int64_t s2)
 {
if (s1 == s2 && !memcmp(l1, l2, s1))
return 1;
return 0;
 }
 
-uint64_t xdl_hash_record(char const **data, char const *top, int64_t flags) {
+uint64_t xdl_hash_record(char const **data, char const *top) {
uint64_t ha = 5381;
char const *ptr = *data;
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -118,7 +118,7 @@
for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
if (rcrec->ha == rec->ha &&
xdl_recmatch(rcrec->line, rcrec->size,
-   rec->ptr, rec->size, cf->flags))
+   rec->ptr, rec->size))
break;
 
if (!rcrec) {
@@ -273,7 +273,7 @@
if ((cur = blk = xdl_mmfile_first(mf, )) != NULL) {
for (top = blk + bsize; cur < top; ) {
prev = cur;
-   hav = xdl_hash_record(, top, xpp->flags);
+   hav = xdl_hash_record(, top);
if (nrec >= narec) {
narec *= 2;
if (!(rrecs = (xrecord_t **) xdl_realloc(recs, 
narec * sizeof(xrecord_t *
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -398,12 +398,11 @@
 }
 
 
-static int recs_match(xrecord_t *rec1, xrecord_t *rec2, int64_t flags)
+static int recs_match(xrecord_t *rec1, xrecord_t *rec2)
 {
return (rec1->ha == rec2->ha &&
xdl_recmatch(rec1->ptr, rec1->size,
-rec2->ptr, rec2->size,
-flags));
+rec2->ptr, rec2->size));
 }
 
 /*
@@ -762,10 +761,10 @@
  * following group, expand this group to include it. Return 0 on success or -1
  * if g cannot be slid down.
  */
-static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g, int64_t flags)
+static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
 {
if (g->end < xdf->nrec &&
-   recs_match(xdf->recs[g->start], xdf->recs[g->end], flags)) {
+   recs_match(xdf->recs[g->start], xdf->recs[g->end])) {
xdf->rchg[g->start++] = 0;
xdf->rchg[g->end++] = 1;
 
@@ -783,10 +782,10 @@
  * into a previous group, expand this group to include it. Return 0 on success
  * or -1 if g cannot be slid up.
  */
-static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g, int64_t flags)
+static int group_slide_up(xdfile_t *xdf, struct xdlgroup *g)
 {
if (g->start > 0 &&
-   recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1], flags)) {
+   recs_match(xdf->recs[g->start - 1], xdf->recs[g->end - 1])) {
xdf->rchg[--g->start] = 1;
xdf->rchg[--g->end] = 0;
 
@@ -847,7 +846,7 @@
end_matching_other = -1;
 
/* Shift the group backward as much as possible: */
-   while (!group_slide_up(xdf, , flags))
+   while (!group_slide_up(xdf, ))
if (group_previous(xdfo, ))

D2766: xdiff: resolve signed unsigned comparison warning

2018-03-09 Thread quark (Jun Wu)
quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Since the value won't be changed inside the code (because context lines
  feature was removed by https://phab.mercurial-scm.org/D2705), let's just 
remove the variable and inline
  the 0 value.
  
  The code might be potentially further simplified. But I'd like to make sure
  correctness is easily verifiable in this patch.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2766

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -1015,16 +1015,14 @@
 xdchange_t *xdl_get_hunk(xdchange_t **xscr)
 {
xdchange_t *xch, *xchp, *lxch;
-   int64_t max_common = 0;
-   int64_t max_ignorable = 0;
uint64_t ignored = 0; /* number of ignored blank lines */
 
/* remove ignorable changes that are too far before other changes */
for (xchp = *xscr; xchp && xchp->ignore; xchp = xchp->next) {
xch = xchp->next;
 
if (xch == NULL ||
-   xch->i1 - (xchp->i1 + xchp->chg1) >= max_ignorable)
+   xch->i1 - (xchp->i1 + xchp->chg1) >= 0)
*xscr = xch;
}
 
@@ -1035,16 +1033,16 @@
 
for (xchp = *xscr, xch = xchp->next; xch; xchp = xch, xch = xch->next) {
int64_t distance = xch->i1 - (xchp->i1 + xchp->chg1);
-   if (distance > max_common)
+   if (distance > 0)
break;
 
-   if (distance < max_ignorable && (!xch->ignore || lxch == xchp)) 
{
+   if (distance < 0 && (!xch->ignore || lxch == xchp)) {
lxch = xch;
ignored = 0;
-   } else if (distance < max_ignorable && xch->ignore) {
+   } else if (distance < 0 && xch->ignore) {
ignored += xch->chg2;
} else if (lxch != xchp &&
-  xch->i1 + ignored - (lxch->i1 + lxch->chg1) > 
max_common) {
+  xch->i1 + ignored - (lxch->i1 + lxch->chg1) > 0) {
break;
} else if (!xch->ignore) {
lxch = xch;



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2686: xdiff: add a preprocessing step that trims files

2018-03-09 Thread quark (Jun Wu)
quark updated this revision to Diff 6797.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2686?vs=6708=6797

REVISION DETAIL
  https://phab.mercurial-scm.org/D2686

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -60,6 +60,10 @@
 
 typedef struct s_xdfenv {
xdfile_t xdf1, xdf2;
+
+   /* number of lines for common prefix and suffix that are removed
+* from xdf1 and xdf2 as a preprocessing step */
+   long nprefix, nsuffix;
 } xdfenv_t;
 
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -156,6 +156,87 @@
 }
 
 
+/*
+ * Trim common prefix from files.
+ *
+ * Note: trimming could affect hunk shifting. But the performance benefit
+ * outweighs the shift change. A diff result with suboptimal shifting is still
+ * valid.
+ */
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+   xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
+   mmfile_t msmall, mlarge;
+   /* prefix lines, prefix bytes, suffix lines, suffix bytes */
+   long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
+   /* prefix char pointer for msmall and mlarge */
+   const char *pp1, *pp2;
+   /* suffix char pointer for msmall and mlarge */
+   const char *ps1, *ps2;
+
+   /* reserved must >= 0 for the line boundary adjustment to work */
+   if (reserved < 0)
+   reserved = 0;
+
+   if (mf1->size < mf2->size) {
+   memcpy(, mf1, sizeof(mmfile_t));
+   memcpy(, mf2, sizeof(mmfile_t));
+   } else {
+   memcpy(, mf2, sizeof(mmfile_t));
+   memcpy(, mf1, sizeof(mmfile_t));
+   }
+
+   pp1 = msmall.ptr, pp2 = mlarge.ptr;
+   for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
+   plines += (*pp1 == '\n');
+   pp1++, pp2++;
+   }
+
+   ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
+   while (ps1 > pp1 && *ps1 == *ps2) {
+   slines += (*ps1 == '\n');
+   ps1--, ps2--;
+   }
+
+   /* Retract common prefix and suffix boundaries for reserved lines */
+   if (plines <= reserved + 1) {
+   plines = 0;
+   } else {
+   i = 0;
+   while (i <= reserved) {
+   pp1--;
+   i += (*pp1 == '\n');
+   }
+   /* The new mmfile starts at the next char just after '\n' */
+   pbytes = pp1 - msmall.ptr + 1;
+   plines -= reserved;
+   }
+
+   if (slines <= reserved + 1) {
+   slines = 0;
+   } else {
+   /* Note: with compiler SIMD support (ex. -O3 -mavx2), this
+* might perform better than memchr. */
+   i = 0;
+   while (i <= reserved) {
+   ps1++;
+   i += (*ps1 == '\n');
+   }
+   /* The new mmfile includes this '\n' */
+   sbytes = msmall.ptr + msmall.size - ps1 - 1;
+   slines -= reserved;
+   if (msmall.ptr[msmall.size - 1] == '\n')
+   slines -= 1;
+   }
+
+   xe->nprefix = plines;
+   xe->nsuffix = slines;
+   out_mf1->ptr = mf1->ptr + pbytes;
+   out_mf1->size = mf1->size - pbytes - sbytes;
+   out_mf2->ptr = mf2->ptr + pbytes;
+   out_mf2->size = mf2->size - pbytes - sbytes;
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, 
xpparam_t const *xpp,
   xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
@@ -254,10 +335,13 @@
xdl_cha_free(>rcha);
 }
 
+/* Reserved lines for trimming, to leave room for shifting */
+#define TRIM_RESERVED_LINES 100
 
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
long enl1, enl2, sample;
+   mmfile_t tmf1, tmf2;
xdlclassifier_t cf;
 
memset(, 0, sizeof(cf));
@@ -270,12 +354,14 @@
if (xdl_init_classifier(, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
-   if (xdl_prepare_ctx(1, mf1, enl1, xpp, , >xdf1) < 0) {
+   xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, , );
+
+   if (xdl_prepare_ctx(1, , enl1, xpp, , >xdf1) < 0) {
 
xdl_free_classifier();
return -1;
}
-   if (xdl_prepare_ctx(2, mf2, enl2, xpp, , >xdf2) < 0) {
+   if (xdl_prepare_ctx(2, , enl2, xpp, , >xdf2) < 0) {
 
xdl_free_ctx(>xdf1);

D2685: xdiff: add comments for fields in xdfile_t

2018-03-09 Thread quark (Jun Wu)
quark updated this revision to Diff 6798.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2685?vs=6643=6798

REVISION DETAIL
  https://phab.mercurial-scm.org/D2685

AFFECTED FILES
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -46,15 +46,49 @@
 } xrecord_t;
 
 typedef struct s_xdfile {
+   /* manual memory management */
chastore_t rcha;
+
+   /* number of records (lines) */
long nrec;
+
+   /* hash table size
+* the maximum hash value in the table is (1 << hbits) */
unsigned int hbits;
+
+   /* hash table, hash value => xrecord_t
+* note: xrecord_t is a linked list. */
xrecord_t **rhash;
+
+   /* range excluding common prefix and suffix
+* [recs[i] for i in range(0, dstart)] are common prefix.
+* [recs[i] for i in range(dstart, dend + 1 - dstart)] are interesting
+* lines */
long dstart, dend;
+
+   /* pointer to records (lines) */
xrecord_t **recs;
+
+   /* record changed, use original "recs" index
+* rchag[i] can be either 0 or 1. 1 means recs[i] (line i) is marked
+* "changed". */
char *rchg;
+
+   /* cleaned-up record index => original "recs" index
+* clean-up means:
+*  rule 1. remove common prefix and suffix
+*  rule 2. remove records that are only on one side, since they can
+*  not match the other side
+* rindex[0] is likely dstart, if not removed up by rule 2.
+* rindex[nreff - 1] is likely dend, if not removed by rule 2.
+*/
long *rindex;
+
+   /* rindex size */
long nreff;
+
+   /* cleaned-up record index => hash value
+* ha[i] = recs[rindex[i]]->ha */
unsigned long *ha;
 } xdfile_t;
 



To: quark, #hg-reviewers
Cc: mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


Re: [PATCH] xdiff: fix trivial build warnings on Windows

2018-03-09 Thread Jun Wu
Excerpts from Yuya Nishihara's message of 2018-03-08 21:33:42 +0900:
> On Tue, 6 Mar 2018 19:12:26 -0800, Jun Wu wrote:
> > Yeah, xdiff needs a migration from using "long", "int"s to "size_t" etc.
> > The git community has chosen to disallow diff >1GB files because of the
> > overflow concern [1].
> > 
> > [1]: 
> > https://github.com/git/git/commit/dcd1742e56ebb944c4ff62346da4548e1e3be675
> 
> So, should we queue this now or leave warnings to denote things that should
> be cleaned up?

I think the ideal solution would be replacing all "long"s to one of:
"int64_t" or "ssize_t", "size_t", instead of doing casting around.

I can talk a look at the actual change, since I think I have some knowledge
about xdiff internals now.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2686: xdiff: add a preprocessing step that trims files

2018-03-09 Thread quark (Jun Wu)
quark added inline comments.

INLINE COMMENTS

> indygreg wrote in xprepare.c:169
> Bonus points if you resubmit this with more expressive variable names. Just 
> because xdiff's code is almost impossible to read doesn't mean we should 
> follow suit :)

The style guide in git community recommends using whatever style around the 
existing code base. I think we actually also do that, since new methods are not 
using `foo_bar` naming.

I'll add comments instead.

> indygreg wrote in xprepare.c:183-193
> I'm still showing this as a hot point in the code when compiling with default 
> settings used by Python packaging tools. I suspect we can get better results 
> on typical compiler flags by tweaking things a bit. But we can do that after 
> this lands.

Yes. It's expected.

I did try various ways to optimize it before sending the patch, including:

- Like `memchr`, test 8 bytes at once. Difficulty: memory alignment is not 
guaranteed (ex. `msmall.ptr % 8 != mlarge.ptr % 8`).
- Use various SIMD related compiler flags.

The first makes things slower, even if I did tell the compiler "pretend the 
memory to be aligned". The second makes no difference.

> indygreg wrote in xprepare.c:199-202
> This is clever. But `memrchr()` will be easier to read. Plus I suspect it 
> will be faster.
> 
> If you disagree, let's compromise at:
> 
>   i = 0;
>   while (i <= reserved) {
>  pp1--;
>  i += (*pp1 == '\n');
>   }
> 
> There's no sense using a `for` without the 3rd parameter IMO.

I think readability of the current code is better, since the memrchr version 
needs a "size" parameter, which is a burden to the existing logic.

I did some research before sending this patch. The glibc memchr is basically 
relying on `maybe_contain_zero_byte` that can test 8 bytes at once. But CPU 
SIMD instructions are faster than that trick.

The following code counts "\n"s in a file, using 3 ways: naive loop, testing 8 
bytes at once, and actually using memchr. See the benchmark at the end.

  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  #include 
  
  char buf[6400] __attribute__ ((aligned (16)));
  int size;
  
  static int count_naive() {
int count = 0, i = 0;
for (int i = 0; i < size; ++i) {
  count += buf[i] == '\n';
}
return count;
  }
  
  static int count_memchr() {
int count = 0, i = 0;
const char *p = buf;
while (p) {
  p = memchr(p + 1, '\n', buf + size - p);
  count++;
}
return count;
  }
  
  static inline int maybe_contain_zero_byte(uint64_t x) {
// See https://github.com/lattera/glibc/blob/master/string/memchr.c
const uint64_t MAGIC_BITS = 0x7efefefefefefeff;
return x + MAGIC_BITS) ^ ~x) & ~MAGIC_BITS) != 0);
  }
  
  static int count_u64() {
uint64_t *p = (uint64_t *)
uint64_t x = '\n' + ('\n' << 8);
int count = 0;
x |= x << 16;
x |= x << 32;
for (int i = 0; i < size / 8; ++i, ++p) {
  uint64_t v = *p ^ x;
  if (maybe_contain_zero_byte(v)) {
const char *c = (const char *) p;
for (int j = 0; j < 8; ++j) {
  count += (((v >> (8 * j)) & 0xff) == 0);
}
  }
}
return count;
  }
  
  int main(int argc, char const *argv[]) {
int fd = open(argv[1], O_RDONLY);
size = (int) read(fd, buf, sizeof buf);
if (argv[2] && argv[2][0] == 'n') {
  printf("naive:  %d\n", count_naive());
} else if (argv[2] && argv[2][0] == 'm') {
  printf("memchr: %d\n", count_memchr());
} else {
  printf("u64:%d\n", count_u64());
}
return 0;
  }
  
  /*
  # gcc 7.3.0
  gcc -O2 a.c -o ao2
  gcc -O3 -mavx2 a.c -o ao3
  
  # best of 50 runs, wall time
  # test case: random data
  # head -c 6400 /dev/urandom > /tmp/r 
  ./ao2 naive  0.069
  ./ao2 u640.043
  ./ao2 memchr 0.039
  ./ao3 naive  0.038  # best
  ./ao3 u640.043
  ./ao3 memchr 0.039
  
  # test case: real code
  # v=read('/home/quark/hg-committed/mercurial/commands.py')
  # write('/tmp/c', v * (6400/len(v)))
  ./ao2 naive  0.069
  ./ao2 u640.059
  ./ao2 memchr 0.055
  ./ao3 naive  0.038  # best
  ./ao3 u640.055
  ./ao3 memchr 0.055  # slower
  
  # ruby script to run the tests
  path = ARGV[0]
  %w[./ao2 ./ao3].product(%w[naive u64 memchr]).each do |exe, name|
time = 50.times.map do
  t1 = Time.now
  system exe, path, name, 1=>'/dev/null'
  Time.now - t1
end.min
puts "#{exe} #{name.ljust(6)} #{time.round(3)}"
  end
  */

So I'd like to keep it simple and avoid over optimization. After all, this is 
O(100)-ish, assuming line length won't be ridiculously long. Even memchr is 
faster by 14%, it won't be noticeable. Not to say it's 31% slower in the -O3 
case.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2686

To: quark, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org

D2686: xdiff: add a preprocessing step that trims files

2018-03-07 Thread quark (Jun Wu)
quark updated this revision to Diff 6708.
quark edited the summary of this revision.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2686?vs=6664=6708

REVISION DETAIL
  https://phab.mercurial-scm.org/D2686

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c
  mercurial/thirdparty/xdiff/xtypes.h

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xtypes.h 
b/mercurial/thirdparty/xdiff/xtypes.h
--- a/mercurial/thirdparty/xdiff/xtypes.h
+++ b/mercurial/thirdparty/xdiff/xtypes.h
@@ -60,6 +60,7 @@
 
 typedef struct s_xdfenv {
xdfile_t xdf1, xdf2;
+   long nprefix, nsuffix;
 } xdfenv_t;
 
 
diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -156,6 +156,78 @@
 }
 
 
+/*
+ * Trim common prefix from files.
+ *
+ * Note: trimming could have side effects on hunk shifting, but the performance
+ * benefit outweighs the possible shifting change.
+ */
+static void xdl_trim_files(mmfile_t *mf1, mmfile_t *mf2, long reserved,
+   xdfenv_t *xe, mmfile_t *out_mf1, mmfile_t *out_mf2) {
+   mmfile_t msmall, mlarge;
+   long plines = 0, pbytes = 0, slines = 0, sbytes = 0, i;
+   const char *pp1, *pp2, *ps1, *ps2;
+
+   /* reserved must >= 0 for the line boundary adjustment to work */
+   if (reserved < 0)
+   reserved = 0;
+
+   if (mf1->size < mf2->size) {
+   memcpy(, mf1, sizeof(mmfile_t));
+   memcpy(, mf2, sizeof(mmfile_t));
+   } else {
+   memcpy(, mf2, sizeof(mmfile_t));
+   memcpy(, mf1, sizeof(mmfile_t));
+   }
+
+   pp1 = msmall.ptr, pp2 = mlarge.ptr;
+   for (i = 0; i < msmall.size && *pp1 == *pp2; ++i) {
+   plines += (*pp1 == '\n');
+   pp1++, pp2++;
+   }
+
+   ps1 = msmall.ptr + msmall.size - 1, ps2 = mlarge.ptr + mlarge.size - 1;
+   for (; ps1 > pp1 && *ps1 == *ps2; ++i) {
+   slines += (*ps1 == '\n');
+   ps1--, ps2--;
+   }
+
+   /* Retract common prefix and suffix boundaries for reserved lines */
+   if (plines <= reserved + 1) {
+   plines = 0;
+   } else {
+   for (i = 0; i <= reserved;) {
+   pp1--;
+   i += (*pp1 == '\n');
+   }
+   /* The new mmfile starts at the next char just after '\n' */
+   pbytes = pp1 - msmall.ptr + 1;
+   plines -= reserved;
+   }
+
+   if (slines <= reserved + 1) {
+   slines = 0;
+   } else {
+   for (i = 0; i <= reserved;) {
+   ps1++;
+   i += (*ps1 == '\n');
+   }
+   /* The new mmfile includes this '\n' */
+   sbytes = msmall.ptr + msmall.size - ps1 - 1;
+   slines -= reserved;
+   if (msmall.ptr[msmall.size - 1] == '\n')
+   slines -= 1;
+   }
+
+   xe->nprefix = plines;
+   xe->nsuffix = slines;
+   out_mf1->ptr = mf1->ptr + pbytes;
+   out_mf1->size = mf1->size - pbytes - sbytes;
+   out_mf2->ptr = mf2->ptr + pbytes;
+   out_mf2->size = mf2->size - pbytes - sbytes;
+}
+
+
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, 
xpparam_t const *xpp,
   xdlclassifier_t *cf, xdfile_t *xdf) {
unsigned int hbits;
@@ -254,10 +326,13 @@
xdl_cha_free(>rcha);
 }
 
+/* Reserved lines for trimming, to leave room for shifting */
+#define TRIM_RESERVED_LINES 100
 
 int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
xdfenv_t *xe) {
long enl1, enl2, sample;
+   mmfile_t tmf1, tmf2;
xdlclassifier_t cf;
 
memset(, 0, sizeof(cf));
@@ -270,12 +345,14 @@
if (xdl_init_classifier(, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
-   if (xdl_prepare_ctx(1, mf1, enl1, xpp, , >xdf1) < 0) {
+   xdl_trim_files(mf1, mf2, TRIM_RESERVED_LINES, xe, , );
+
+   if (xdl_prepare_ctx(1, , enl1, xpp, , >xdf1) < 0) {
 
xdl_free_classifier();
return -1;
}
-   if (xdl_prepare_ctx(2, mf2, enl2, xpp, , >xdf2) < 0) {
+   if (xdl_prepare_ctx(2, , enl2, xpp, , >xdf2) < 0) {
 
xdl_free_ctx(>xdf1);
xdl_free_classifier();
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -1062,6 +1062,7 @@
 static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
  xdemitconf_t const *xecfg)
 {
+   long p = xe->nprefix, s = xe->nsuffix;
xdchange_t *xch, *xche;
 
if (!xecfg->hunk_func)
@@ -1073,23 

  1   2   3   4   5   6   7   8   9   10   >