quark created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Before running the main diff algorithm, xdiff will "prepare" the contexts
  for both files. That includes splitting, hashing all the lines, and building
  hash tables for those lines. The hash table building process could be
  expensive. Moving it out so it can be optimized separately.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2630

AFFECTED FILES
  mercurial/thirdparty/xdiff/xprepare.c

CHANGE DETAILS

diff --git a/mercurial/thirdparty/xdiff/xprepare.c 
b/mercurial/thirdparty/xdiff/xprepare.c
--- a/mercurial/thirdparty/xdiff/xprepare.c
+++ b/mercurial/thirdparty/xdiff/xprepare.c
@@ -157,36 +157,25 @@
 
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, 
xpparam_t const *xpp,
                           xdlclassifier_t *cf, xdfile_t *xdf) {
-       unsigned int hbits;
-       long nrec, hsize, bsize;
+       long nrec, bsize;
        unsigned long hav;
        char const *blk, *cur, *top, *prev;
        xrecord_t *crec;
        xrecord_t **recs, **rrecs;
-       xrecord_t **rhash;
        unsigned long *ha;
        char *rchg;
        long *rindex;
 
        ha = NULL;
        rindex = NULL;
        rchg = NULL;
-       rhash = NULL;
        recs = NULL;
 
        if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
                goto abort;
        if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
                goto abort;
 
-       {
-               hbits = xdl_hashbits((unsigned int) narec);
-               hsize = 1 << hbits;
-               if (!(rhash = (xrecord_t **) xdl_malloc(hsize * 
sizeof(xrecord_t *))))
-                       goto abort;
-               memset(rhash, 0, hsize * sizeof(xrecord_t *));
-       }
-
        nrec = 0;
        if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
                for (top = blk + bsize; cur < top; ) {
@@ -204,9 +193,6 @@
                        crec->size = (long) (cur - prev);
                        crec->ha = hav;
                        recs[nrec++] = crec;
-
-                       if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 
0)
-                               goto abort;
                }
        }
 
@@ -221,27 +207,60 @@
 
        xdf->nrec = nrec;
        xdf->recs = recs;
-       xdf->hbits = hbits;
-       xdf->rhash = rhash;
        xdf->rchg = rchg + 1;
        xdf->rindex = rindex;
        xdf->nreff = 0;
        xdf->ha = ha;
        xdf->dstart = 0;
        xdf->dend = nrec - 1;
 
+       /* use xdl_prepare_hashtable to set them */
+       xdf->hbits = 0;
+       xdf->rhash = NULL;
+
        return 0;
 
 abort:
        xdl_free(ha);
        xdl_free(rindex);
        xdl_free(rchg);
-       xdl_free(rhash);
        xdl_free(recs);
        xdl_cha_free(&xdf->rcha);
        return -1;
 }
 
+/*
+ * Adjust hash values for records (lines) in a file so the hash values become
+ * unique. This makes future calculation faster - they can just compare "ha"
+ * instead of comparing line content.
+ */
+static int xdl_prepare_hashtable(unsigned int pass, mmfile_t *mf,
+               xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) {
+       xrecord_t **rhash = NULL;
+       long nrec = xdf->nrec;
+       unsigned int hbits;
+       long hsize;
+       long i;
+
+       hbits = xdl_hashbits((unsigned int) nrec);
+       hsize = 1 << hbits;
+       if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+               goto abort;
+       memset(rhash, 0, hsize * sizeof(xrecord_t *));
+
+       for (i = 0; i < nrec; ++i) {
+               if (xdl_classify_record(pass, cf, rhash, hbits, xdf->recs[i]) < 
0)
+                       goto abort;
+       }
+
+       xdf->hbits = hbits;
+       xdf->rhash = rhash;
+
+       return 0;
+abort:
+       xdl_free(rhash);
+       return -1;
+}
 
 static void xdl_free_ctx(xdfile_t *xdf) {
 
@@ -288,6 +307,19 @@
                return -1;
        }
 
+       if (xdl_prepare_hashtable(1, mf1, xpp, &cf, &xe->xdf1) < 0) {
+               xdl_free_ctx(&xe->xdf1);
+               xdl_free_ctx(&xe->xdf2);
+               xdl_free_classifier(&cf);
+               return -1;
+       }
+       if (xdl_prepare_hashtable(2, mf2, xpp, &cf, &xe->xdf2) < 0) {
+               xdl_free_ctx(&xe->xdf1);
+               xdl_free_ctx(&xe->xdf2);
+               xdl_free_classifier(&cf);
+               return -1;
+       }
+
        if (xdl_cleanup_records(&cf, &xe->xdf1, &xe->xdf2) < 0) {
                xdl_free_ctx(&xe->xdf2);
                xdl_free_ctx(&xe->xdf1);



To: quark, #hg-reviewers
Cc: mercurial-devel
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to