D2573: xdiff: remove patience and histogram diff algorithms

2018-03-03 Thread quark (Jun Wu)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG3ee9ca23dad0: xdiff: remove patience and histogram diff 
algorithms (authored by quark, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2573?vs=6473&id=6512

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xhistogram.c
  mercurial/thirdparty/xdiff/xpatience.c
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -27,7 +27,6 @@
 #define XDL_MAX_EQLIMIT 1024
 #define XDL_SIMSCAN_WINDOW 100
 #define XDL_GUESS_NLINES1 256
-#define XDL_GUESS_NLINES2 20
 
 
 typedef struct s_xdlclass {
@@ -181,9 +180,7 @@
if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *
goto abort;
 
-   if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-   hbits = hsize = 0;
-   else {
+   {
hbits = xdl_hashbits((unsigned int) narec);
hsize = 1 << hbits;
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *
@@ -209,8 +206,7 @@
crec->ha = hav;
recs[nrec++] = crec;
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
+   if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
goto abort;
}
}
@@ -266,21 +262,12 @@
 
memset(&cf, 0, sizeof(cf));
 
-   /*
-* For histogram diff, we can afford a smaller sample size and
-* thus a poorer estimate of the number of lines, as the hash
-* table (rhash) won't be filled up/grown. The number of lines
-* (nrecs) will be updated correctly anyway by
-* xdl_prepare_ctx().
-*/
-   sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF
- ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1);
+   sample = XDL_GUESS_NLINES1;
 
enl1 = xdl_guess_lines(mf1, sample) + 1;
enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-   xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+   if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -295,18 +282,14 @@
return -1;
}
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-   (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
-
+   if (xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf2);
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
return -1;
}
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-   xdl_free_classifier(&cf);
+   xdl_free_classifier(&cf);
 
return 0;
 }
diff --git a/mercurial/thirdparty/xdiff/xpatience.c 
b/mercurial/thirdparty/xdiff/xpatience.c
deleted file mode 100644
--- a/mercurial/thirdparty/xdiff/xpatience.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- *  LibXDiff by Davide Libenzi ( File Differential Library )
- *  Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, see
- *  .
- *
- *  Davide Libenzi 
- *
- */
-#include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
-
-/*
- * The basic idea of patience diff is to find lines that are unique in
- * both files.  These are intuitively the ones that we want to see as
- * common lines.
- *
- * The maximal ordered sequence of such line pairs (where ordered means
- * that the order in the sequence agrees with the order of the lines in
- * both files) naturally defines an initial set of common lines.
- *
- * Now, the algorithm tries to extend the set of common lines by growing
- * the line ra

D2573: xdiff: remove patience and histogram diff algorithms

2018-03-03 Thread quark (Jun Wu)
quark added a comment.


  Googling "patience diff", most results will say it is slower but has better 
diff quality sometimes.
  
  That is very misleading - the algorithm adds greediness (i.e. incorrectness) 
and is in theory faster in some cases. The "better" quality is also untrue 
comparing to indent heuristic.
  
  Technically, I don't think it's worthwhile to keep - definitely worse quality 
and unpredictable performance changes comparing to default diff + indent 
heuristic setup.
  
  From a non-technical point of view, I think educating people that they cannot 
be misled is very important. This one alone is an enough reason to do the 
cleanup, imo.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

To: ryanmce, #hg-reviewers, indygreg
Cc: quark, indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2573: xdiff: remove patience and histogram diff algorithms

2018-03-03 Thread indygreg (Gregory Szorc)
indygreg accepted this revision as: indygreg.
indygreg added a comment.


  I'm leaning towards keeping these algorithms so we can expose them as 
alternate implementations in the future. It will also making syncing code from 
upstream easier. But removing the unused-by-us code is fine.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

To: ryanmce, #hg-reviewers, indygreg
Cc: indygreg, mercurial-devel
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel


D2573: xdiff: remove patience and histogram diff algorithms

2018-03-03 Thread quark (Jun Wu)
quark updated this revision to Diff 6473.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2573?vs=6456&id=6473

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xhistogram.c
  mercurial/thirdparty/xdiff/xpatience.c
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -27,7 +27,6 @@
 #define XDL_MAX_EQLIMIT 1024
 #define XDL_SIMSCAN_WINDOW 100
 #define XDL_GUESS_NLINES1 256
-#define XDL_GUESS_NLINES2 20
 
 
 typedef struct s_xdlclass {
@@ -181,9 +180,7 @@
if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *
goto abort;
 
-   if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-   hbits = hsize = 0;
-   else {
+   {
hbits = xdl_hashbits((unsigned int) narec);
hsize = 1 << hbits;
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *
@@ -209,8 +206,7 @@
crec->ha = hav;
recs[nrec++] = crec;
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
+   if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
goto abort;
}
}
@@ -266,21 +262,12 @@
 
memset(&cf, 0, sizeof(cf));
 
-   /*
-* For histogram diff, we can afford a smaller sample size and
-* thus a poorer estimate of the number of lines, as the hash
-* table (rhash) won't be filled up/grown. The number of lines
-* (nrecs) will be updated correctly anyway by
-* xdl_prepare_ctx().
-*/
-   sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF
- ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1);
+   sample = XDL_GUESS_NLINES1;
 
enl1 = xdl_guess_lines(mf1, sample) + 1;
enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-   xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+   if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -295,18 +282,14 @@
return -1;
}
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-   (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
-
+   if (xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf2);
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
return -1;
}
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-   xdl_free_classifier(&cf);
+   xdl_free_classifier(&cf);
 
return 0;
 }
diff --git a/mercurial/thirdparty/xdiff/xpatience.c 
b/mercurial/thirdparty/xdiff/xpatience.c
deleted file mode 100644
--- a/mercurial/thirdparty/xdiff/xpatience.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- *  LibXDiff by Davide Libenzi ( File Differential Library )
- *  Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, see
- *  .
- *
- *  Davide Libenzi 
- *
- */
-#include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
-
-/*
- * The basic idea of patience diff is to find lines that are unique in
- * both files.  These are intuitively the ones that we want to see as
- * common lines.
- *
- * The maximal ordered sequence of such line pairs (where ordered means
- * that the order in the sequence agrees with the order of the lines in
- * both files) naturally defines an initial set of common lines.
- *
- * Now, the algorithm tries to extend the set of common lines by growing
- * the line ranges where the files have identical lines.
- *
- * Between those common lines, the patience diff algorithm is applied
- * recursively, until no unique line 

D2573: xdiff: remove patience and histogram diff algorithms

2018-03-03 Thread quark (Jun Wu)
quark updated this revision to Diff 6456.
quark edited the summary of this revision.
quark retitled this revision from "xdiff: remove patience and histogram diffs" 
to "xdiff: remove patience and histogram diff algorithms".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2573?vs=6416&id=6456

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

AFFECTED FILES
  mercurial/third-party/xdiff/xdiff.h
  mercurial/third-party/xdiff/xdiffi.c
  mercurial/third-party/xdiff/xhistogram.c
  mercurial/third-party/xdiff/xpatience.c
  mercurial/third-party/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/third-party/xdiff/xprepare.c 
b/mercurial/third-party/xdiff/xprepare.c
--- a/mercurial/third-party/xdiff/xprepare.c
+++ b/mercurial/third-party/xdiff/xprepare.c
@@ -27,7 +27,6 @@
 #define XDL_MAX_EQLIMIT 1024
 #define XDL_SIMSCAN_WINDOW 100
 #define XDL_GUESS_NLINES1 256
-#define XDL_GUESS_NLINES2 20
 
 
 typedef struct s_xdlclass {
@@ -181,9 +180,7 @@
if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *
goto abort;
 
-   if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-   hbits = hsize = 0;
-   else {
+   {
hbits = xdl_hashbits((unsigned int) narec);
hsize = 1 << hbits;
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *
@@ -209,8 +206,7 @@
crec->ha = hav;
recs[nrec++] = crec;
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
+   if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
goto abort;
}
}
@@ -266,21 +262,12 @@
 
memset(&cf, 0, sizeof(cf));
 
-   /*
-* For histogram diff, we can afford a smaller sample size and
-* thus a poorer estimate of the number of lines, as the hash
-* table (rhash) won't be filled up/grown. The number of lines
-* (nrecs) will be updated correctly anyway by
-* xdl_prepare_ctx().
-*/
-   sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF
- ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1);
+   sample = XDL_GUESS_NLINES1;
 
enl1 = xdl_guess_lines(mf1, sample) + 1;
enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-   xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+   if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
return -1;
 
if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -295,18 +282,14 @@
return -1;
}
 
-   if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-   (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-   xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
-
+   if (xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
xdl_free_ctx(&xe->xdf2);
xdl_free_ctx(&xe->xdf1);
xdl_free_classifier(&cf);
return -1;
}
 
-   if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-   xdl_free_classifier(&cf);
+   xdl_free_classifier(&cf);
 
return 0;
 }
diff --git a/mercurial/third-party/xdiff/xpatience.c 
b/mercurial/third-party/xdiff/xpatience.c
deleted file mode 100644
--- a/mercurial/third-party/xdiff/xpatience.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- *  LibXDiff by Davide Libenzi ( File Differential Library )
- *  Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2.1 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, see
- *  .
- *
- *  Davide Libenzi 
- *
- */
-#include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
-
-/*
- * The basic idea of patience diff is to find lines that are unique in
- * both files.  These are intuitively the ones that we want to see as
- * common lines.
- *
- * The maximal ordered sequence of such line pairs (where ordered means
- * that the order in the sequence agrees with the order of the lines in
- * both files) naturally defines an initial set of common lines.
- *
- * Now, the algorithm tries to extend the set of c