quark created this revision. Herald added a subscriber: mercurial-devel. Herald added a reviewer: hg-reviewers.
REVISION SUMMARY Before running the main diff algorithm, xdiff will "prepare" the contexts for both files. That includes splitting, hashing all the lines, and building hash tables for those lines. The hash table building process could be expensive. Moving it out so it can be optimized separately. REPOSITORY rHG Mercurial REVISION DETAIL https://phab.mercurial-scm.org/D2630 AFFECTED FILES mercurial/thirdparty/xdiff/xprepare.c CHANGE DETAILS diff --git a/mercurial/thirdparty/xdiff/xprepare.c b/mercurial/thirdparty/xdiff/xprepare.c --- a/mercurial/thirdparty/xdiff/xprepare.c +++ b/mercurial/thirdparty/xdiff/xprepare.c @@ -157,36 +157,25 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { - unsigned int hbits; - long nrec, hsize, bsize; + long nrec, bsize; unsigned long hav; char const *blk, *cur, *top, *prev; xrecord_t *crec; xrecord_t **recs, **rrecs; - xrecord_t **rhash; unsigned long *ha; char *rchg; long *rindex; ha = NULL; rindex = NULL; rchg = NULL; - rhash = NULL; recs = NULL; if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0) goto abort; if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *)))) goto abort; - { - hbits = xdl_hashbits((unsigned int) narec); - hsize = 1 << hbits; - if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) - goto abort; - memset(rhash, 0, hsize * sizeof(xrecord_t *)); - } - nrec = 0; if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) { for (top = blk + bsize; cur < top; ) { @@ -204,9 +193,6 @@ crec->size = (long) (cur - prev); crec->ha = hav; recs[nrec++] = crec; - - if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) - goto abort; } } @@ -221,27 +207,60 @@ xdf->nrec = nrec; xdf->recs = recs; - xdf->hbits = hbits; - xdf->rhash = rhash; xdf->rchg = rchg + 1; xdf->rindex = rindex; xdf->nreff = 0; xdf->ha = ha; xdf->dstart = 0; xdf->dend = nrec - 1; + /* use xdl_prepare_hashtable to set them */ + xdf->hbits = 0; + xdf->rhash = NULL; + return 0; abort: xdl_free(ha); xdl_free(rindex); xdl_free(rchg); - xdl_free(rhash); xdl_free(recs); xdl_cha_free(&xdf->rcha); return -1; } +/* + * Adjust hash values for records (lines) in a file so the hash values become + * unique. This makes future calculation faster - they can just compare "ha" + * instead of comparing line content. + */ +static int xdl_prepare_hashtable(unsigned int pass, mmfile_t *mf, + xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { + xrecord_t **rhash = NULL; + long nrec = xdf->nrec; + unsigned int hbits; + long hsize; + long i; + + hbits = xdl_hashbits((unsigned int) nrec); + hsize = 1 << hbits; + if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *)))) + goto abort; + memset(rhash, 0, hsize * sizeof(xrecord_t *)); + + for (i = 0; i < nrec; ++i) { + if (xdl_classify_record(pass, cf, rhash, hbits, xdf->recs[i]) < 0) + goto abort; + } + + xdf->hbits = hbits; + xdf->rhash = rhash; + + return 0; +abort: + xdl_free(rhash); + return -1; +} static void xdl_free_ctx(xdfile_t *xdf) { @@ -288,6 +307,19 @@ return -1; } + if (xdl_prepare_hashtable(1, mf1, xpp, &cf, &xe->xdf1) < 0) { + xdl_free_ctx(&xe->xdf1); + xdl_free_ctx(&xe->xdf2); + xdl_free_classifier(&cf); + return -1; + } + if (xdl_prepare_hashtable(2, mf2, xpp, &cf, &xe->xdf2) < 0) { + xdl_free_ctx(&xe->xdf1); + xdl_free_ctx(&xe->xdf2); + xdl_free_classifier(&cf); + return -1; + } + if (xdl_cleanup_records(&cf, &xe->xdf1, &xe->xdf2) < 0) { xdl_free_ctx(&xe->xdf2); xdl_free_ctx(&xe->xdf1); To: quark, #hg-reviewers Cc: mercurial-devel _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel