# HG changeset patch # User Boris Feld <boris.f...@octobus.net> # Date 1541785632 -3600 # Fri Nov 09 18:47:12 2018 +0100 # Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0 # Parent 0ea42453fa491793d1e145f5093b65e84cb65e97 # EXP-Topic sparse-perf # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4a1104eade1d sparse-revlog: introduce native (C) implementation of slicechunktodensity
This is a C implementation of `_slicechunktodensity` in the `mercurial/revlogutils/deltas.py` file. The algorithm involves a lot of integer manipulation and low-level access to index data. Having a C implementation of it raises a large performance improvement. See later changeset in this series for details. diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c +++ b/mercurial/cext/revlog.c @@ -11,6 +11,7 @@ #include <assert.h> #include <ctype.h> #include <stddef.h> +#include <stdlib.h> #include <string.h> #include "bitmanipulation.h" @@ -1041,6 +1042,197 @@ static PyObject *_trimchunk(indexObject return chunk; } +struct Gap { + long size; + long idx; +}; + +static int gap_compare(const void *left, const void *right) +{ + return ((struct Gap *)left)->size - ((struct Gap *)right)->size; +} +static int long_compare(const void *left, const void *right) +{ + return *(long *)left - *(long *)right; +} + +static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args) +{ + /* method arguments */ + PyObject *list_revs = NULL; /* revisions in the chain */ + double targetdensity = 0.5; /* min density to achieve */ + long mingapsize = 0; /* threshold to ignore gaps */ + + /* other core variables */ + long i; /* used for various iteration */ + PyObject *result = NULL; /* the final return of the function */ + + /* generic information about the delta chain being slice */ + Py_ssize_t num_revs = 0; /* size of the full delta chain */ + Py_ssize_t *revs = NULL; /* native array of revision in the chain */ + long chainpayload = 0; /* sum of all delta in the chain */ + long deltachainspan = 0; /* distance from first byte to last byte */ + + /* variable used for slicing the delta chain */ + long readdata = 0; /* amount of data currently planned to be read */ + double density = 0; /* ration of payload data compared to read ones */ + struct Gap *gaps = NULL; /* array of notable gap in the chain */ + long num_gaps = 0; /* total number of notable gap recorded so far */ + Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */ + long num_selected = 0; /* number of gaps skipped */ + PyObject *chunk = NULL; /* individual slice */ + PyObject *allchunks = PyList_New(num_selected); /* all slices */ + + /* parsing argument */ + if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs, + &targetdensity, &mingapsize)) { + goto bail; + } + + /* If the delta chain contains a single element, we do not need slicing + */ + num_revs = PyList_GET_SIZE(list_revs); + if (num_revs <= 1) { + result = PyTuple_Pack(1, list_revs); + goto done; + } + + /* Turn the python list into a native integer array (for efficiency) */ + revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t)); + if (revs == NULL) { + PyErr_NoMemory(); + goto bail; + } + for (i = 0; i < num_revs; i++) { + Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i)); + if (revnum == -1 && PyErr_Occurred()) { + goto bail; + } + revs[i] = revnum; + } + + /* Compute and check various property of the unsliced delta chain */ + deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]); + + if (deltachainspan <= mingapsize) { + result = PyTuple_Pack(1, list_revs); + goto done; + } + chainpayload = 0; + for (i = 0; i < num_revs; i++) { + chainpayload += index_get_length(self, revs[i]); + } + + readdata = deltachainspan; + density = 1.0; + + if (0 < deltachainspan) { + density = (double)chainpayload / (double)deltachainspan; + }; + + if (density >= targetdensity) { + result = PyTuple_Pack(1, list_revs); + goto done; + } + + /* if chain is too sparse, look for relevant gaps */ + gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap)); + if (gaps == NULL) { + PyErr_NoMemory(); + goto bail; + } + + Py_ssize_t previous_end = -1; + for (i = 0; i < num_revs; i++) { + Py_ssize_t revstart = index_get_start(self, revs[i]); + Py_ssize_t revsize = index_get_length(self, revs[i]); + if (revsize <= 0) { + continue; + } + if (previous_end >= 0) { + Py_ssize_t gapsize = revstart - previous_end; + if (gapsize > mingapsize) { + gaps[num_gaps].size = gapsize; + gaps[num_gaps].idx = i; + num_gaps += 1; + } + } + previous_end = revstart + revsize; + } + if (num_gaps == 0) { + result = PyTuple_Pack(1, list_revs); + goto done; + } + qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare); + + /* Slice the largest gap first, they improve the density the most */ + selected_indices = + (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t)); + if (selected_indices == NULL) { + PyErr_NoMemory(); + goto bail; + } + + for (i = num_gaps - 1; i >= 0; i--) { + selected_indices[num_selected] = gaps[i].idx; + readdata -= gaps[i].size; + num_selected += 1; + if (readdata <= 0) { + density = 1.0; + } else { + density = (double)chainpayload / (double)readdata; + } + if (density >= targetdensity) { + break; + } + } + qsort(selected_indices, num_selected, sizeof(Py_ssize_t), + &long_compare); + + /* create the resulting slice */ + long previdx = 0; + selected_indices[num_selected] = num_revs; + for (i = 0; i <= num_selected; i++) { + long idx = selected_indices[i]; + chunk = _trimchunk(self, list_revs, previdx, idx); + if (chunk == NULL) { + goto bail; + } + if (PyList_GET_SIZE(chunk) > 0) { + if (PyList_Append(allchunks, chunk) == -1) { + goto bail; + } + } + Py_DECREF(chunk); + chunk = NULL; + previdx = idx; + } + result = allchunks; + goto done; + +bail: + if (allchunks != NULL) { + Py_DECREF(allchunks); + } + if (chunk != NULL) { + Py_DECREF(chunk); + } +done: + if (revs != NULL) { + free(revs); + } + if (gaps != NULL) { + free(gaps); + } + if (selected_indices != NULL) { + free(selected_indices); + } + if (result != NULL) { + Py_INCREF(result); + } + return result; +} + static inline int nt_level(const char *node, Py_ssize_t level) { int v = node[level >> 1]; @@ -2265,6 +2457,8 @@ static PyMethodDef index_methods[] = { "get filtered head revisions"}, /* Can always do filtering */ {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"}, + {"slicechunktodensity", (PyCFunction)index_slicechunktodensity, + METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"}, {"append", (PyCFunction)index_append, METH_O, "append an index entry"}, {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, "match a potentially ambiguous node ID"}, _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel