ryanmce created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  They are greedy algorithms that yields suboptimal results. Patience diff has
  been advertised as "slower, but generating better results sometimes" for a
  long time. But it's in theory "faster if there are common prefix/suffix
  and/or unique lines, could generate suboptimal results". As demonstrated by
  this test case:
  
    open('a', 'w').write('\n'.join(list('a' + 'x' * 300 + 'u' + 'x' * 700 + 
'a\n')))
    open('b', 'w').write('\n'.join(list('b' + 'x' * 700 + 'u' + 'x' * 300 + 
'b\n')))
  
  For generating "better" results, the diff sliding heuristic [1] is a better
  solution in general.
  
  So let's just remove patience diff and its variant histogram diff to
  simplify the code.
  
  [1]: 
https://github.com/git/git/commit/433860f3d0beb0c6f205290bd16cda413148f098

TEST PLAN
  `gcc -fPIC *.c --shared -o xdiff.so` still builds.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2573

AFFECTED FILES
  mercurial/thirdparty/xdiff/xdiff.h
  mercurial/thirdparty/xdiff/xdiffi.c
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -27,7 +27,6 @@
 #define XDL_MAX_EQLIMIT 1024
 #define XDL_SIMSCAN_WINDOW 100
 #define XDL_GUESS_NLINES1 256
-#define XDL_GUESS_NLINES2 20
 
 
 typedef struct s_xdlclass {
@@ -181,9 +180,7 @@
        if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
                goto abort;
 
-       if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-               hbits = hsize = 0;
-       else {
+       {
                hbits = xdl_hashbits((unsigned int) narec);
                hsize = 1 << hbits;
                if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *))))
@@ -209,8 +206,7 @@
                        crec->ha = hav;
                        recs[nrec++] = crec;
 
-                       if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-                           xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
+                       if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
                                goto abort;
                }
        }
@@ -266,21 +262,12 @@
 
        memset(&cf, 0, sizeof(cf));
 
-       /*
-        * For histogram diff, we can afford a smaller sample size and
-        * thus a poorer estimate of the number of lines, as the hash
-        * table (rhash) won't be filled up/grown. The number of lines
-        * (nrecs) will be updated correctly anyway by
-        * xdl_prepare_ctx().
-        */
-       sample = (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF
-                 ? XDL_GUESS_NLINES2 : XDL_GUESS_NLINES1);
+       sample = XDL_GUESS_NLINES1;
 
        enl1 = xdl_guess_lines(mf1, sample) + 1;
        enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-       if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-           xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+       if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
                return -1;
 
        if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -295,18 +282,14 @@
                return -1;
        }
 
-       if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
-           (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-           xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
-
+       if (xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
                xdl_free_ctx(&xe->xdf2);
                xdl_free_ctx(&xe->xdf1);
                xdl_free_classifier(&cf);
                return -1;
        }
 
-       if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-               xdl_free_classifier(&cf);
+       xdl_free_classifier(&cf);
 
        return 0;
 }
diff --git a/mercurial/thirdparty/xdiff/xdiffi.c 
b/mercurial/thirdparty/xdiff/xdiffi.c
--- a/mercurial/thirdparty/xdiff/xdiffi.c
+++ b/mercurial/thirdparty/xdiff/xdiffi.c
@@ -328,12 +328,6 @@
        xdalgoenv_t xenv;
        diffdata_t dd1, dd2;
 
-       if (XDF_DIFF_ALG(xpp->flags) == XDF_PATIENCE_DIFF)
-               return xdl_do_patience_diff(mf1, mf2, xpp, xe);
-
-       if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-               return xdl_do_histogram_diff(mf1, mf2, xpp, xe);
-
        if (xdl_prepare_env(mf1, mf2, xpp, xe) < 0) {
 
                return -1;
diff --git a/mercurial/thirdparty/xdiff/xdiff.h 
b/mercurial/thirdparty/xdiff/xdiff.h
--- a/mercurial/thirdparty/xdiff/xdiff.h
+++ b/mercurial/thirdparty/xdiff/xdiff.h
@@ -27,6 +27,8 @@
 extern "C" {
 #endif /* #ifdef __cplusplus */
 
+#include <stddef.h> /* size_t */
+
 /* xpparm_t.flags */
 #define XDF_NEED_MINIMAL (1 << 0)
 
@@ -41,11 +43,6 @@
 
 #define XDF_IGNORE_BLANK_LINES (1 << 7)
 
-#define XDF_PATIENCE_DIFF (1 << 14)
-#define XDF_HISTOGRAM_DIFF (1 << 15)
-#define XDF_DIFF_ALGORITHM_MASK (XDF_PATIENCE_DIFF | XDF_HISTOGRAM_DIFF)
-#define XDF_DIFF_ALG(x) ((x) & XDF_DIFF_ALGORITHM_MASK)
-
 #define XDF_INDENT_HEURISTIC (1 << 23)
 
 /* xdemitconf_t.flags */



To: ryanmce, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to