# HG changeset patch
# User Boris Feld <boris.f...@octobus.net>
# Date 1541785632 -3600
#      Fri Nov 09 18:47:12 2018 +0100
# Node ID 4a1104eade1dfb1697517d60d2c5fd7a98b8c7f0
# Parent  0ea42453fa491793d1e145f5093b65e84cb65e97
# EXP-Topic sparse-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 
4a1104eade1d
sparse-revlog: introduce native (C) implementation of slicechunktodensity

This is a C implementation of `_slicechunktodensity` in the
`mercurial/revlogutils/deltas.py` file.

The algorithm involves a lot of integer manipulation and low-level access to
index data. Having a C implementation of it raises a large performance
improvement. See later changeset in this series for details.

diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -11,6 +11,7 @@
 #include <assert.h>
 #include <ctype.h>
 #include <stddef.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include "bitmanipulation.h"
@@ -1041,6 +1042,197 @@ static PyObject *_trimchunk(indexObject 
        return chunk;
 }
 
+struct Gap {
+       long size;
+       long idx;
+};
+
+static int gap_compare(const void *left, const void *right)
+{
+       return ((struct Gap *)left)->size - ((struct Gap *)right)->size;
+}
+static int long_compare(const void *left, const void *right)
+{
+       return *(long *)left - *(long *)right;
+}
+
+static PyObject *index_slicechunktodensity(indexObject *self, PyObject *args)
+{
+       /* method arguments */
+       PyObject *list_revs = NULL; /* revisions in the chain */
+       double targetdensity = 0.5; /* min density to achieve */
+       long mingapsize = 0;        /* threshold to ignore gaps */
+
+       /* other core variables */
+       long i;                  /* used for various iteration */
+       PyObject *result = NULL; /* the final return of the function */
+
+       /* generic information about the delta chain being slice */
+       Py_ssize_t num_revs = 0; /* size of the full delta chain */
+       Py_ssize_t *revs = NULL; /* native array of revision in the chain */
+       long chainpayload = 0;   /* sum of all delta in the chain */
+       long deltachainspan = 0; /* distance from first byte to last byte */
+
+       /* variable used for slicing the delta chain */
+       long readdata = 0;  /* amount of data currently planned to be read */
+       double density = 0; /* ration of payload data compared to read ones */
+       struct Gap *gaps = NULL; /* array of notable gap in the chain */
+       long num_gaps = 0; /* total number of notable gap recorded so far */
+       Py_ssize_t *selected_indices = NULL; /* indices of gap skipped over */
+       long num_selected = 0;               /* number of gaps skipped */
+       PyObject *chunk = NULL;              /* individual slice */
+       PyObject *allchunks = PyList_New(num_selected); /* all slices */
+
+       /* parsing argument */
+       if (!PyArg_ParseTuple(args, "O!dl", &PyList_Type, &list_revs,
+                             &targetdensity, &mingapsize)) {
+               goto bail;
+       }
+
+       /* If the delta chain contains a single element, we do not need slicing
+        */
+       num_revs = PyList_GET_SIZE(list_revs);
+       if (num_revs <= 1) {
+               result = PyTuple_Pack(1, list_revs);
+               goto done;
+       }
+
+       /* Turn the python list into a native integer array (for efficiency) */
+       revs = (Py_ssize_t *)malloc((num_revs) * sizeof(Py_ssize_t));
+       if (revs == NULL) {
+               PyErr_NoMemory();
+               goto bail;
+       }
+       for (i = 0; i < num_revs; i++) {
+               Py_ssize_t revnum = PyInt_AsLong(PyList_GET_ITEM(list_revs, i));
+               if (revnum == -1 && PyErr_Occurred()) {
+                       goto bail;
+               }
+               revs[i] = revnum;
+       }
+
+       /* Compute and check various property of the unsliced delta chain */
+       deltachainspan = index_segment_span(self, revs[0], revs[num_revs - 1]);
+
+       if (deltachainspan <= mingapsize) {
+               result = PyTuple_Pack(1, list_revs);
+               goto done;
+       }
+       chainpayload = 0;
+       for (i = 0; i < num_revs; i++) {
+               chainpayload += index_get_length(self, revs[i]);
+       }
+
+       readdata = deltachainspan;
+       density = 1.0;
+
+       if (0 < deltachainspan) {
+               density = (double)chainpayload / (double)deltachainspan;
+       };
+
+       if (density >= targetdensity) {
+               result = PyTuple_Pack(1, list_revs);
+               goto done;
+       }
+
+       /* if chain is too sparse, look for relevant gaps */
+       gaps = (struct Gap *)malloc((num_revs) * sizeof(struct Gap));
+       if (gaps == NULL) {
+               PyErr_NoMemory();
+               goto bail;
+       }
+
+       Py_ssize_t previous_end = -1;
+       for (i = 0; i < num_revs; i++) {
+               Py_ssize_t revstart = index_get_start(self, revs[i]);
+               Py_ssize_t revsize = index_get_length(self, revs[i]);
+               if (revsize <= 0) {
+                       continue;
+               }
+               if (previous_end >= 0) {
+                       Py_ssize_t gapsize = revstart - previous_end;
+                       if (gapsize > mingapsize) {
+                               gaps[num_gaps].size = gapsize;
+                               gaps[num_gaps].idx = i;
+                               num_gaps += 1;
+                       }
+               }
+               previous_end = revstart + revsize;
+       }
+       if (num_gaps == 0) {
+               result = PyTuple_Pack(1, list_revs);
+               goto done;
+       }
+       qsort(gaps, num_gaps, sizeof(struct Gap), &gap_compare);
+
+       /* Slice the largest gap first, they improve the density the most */
+       selected_indices =
+           (Py_ssize_t *)malloc((num_gaps + 1) * sizeof(Py_ssize_t));
+       if (selected_indices == NULL) {
+               PyErr_NoMemory();
+               goto bail;
+       }
+
+       for (i = num_gaps - 1; i >= 0; i--) {
+               selected_indices[num_selected] = gaps[i].idx;
+               readdata -= gaps[i].size;
+               num_selected += 1;
+               if (readdata <= 0) {
+                       density = 1.0;
+               } else {
+                       density = (double)chainpayload / (double)readdata;
+               }
+               if (density >= targetdensity) {
+                       break;
+               }
+       }
+       qsort(selected_indices, num_selected, sizeof(Py_ssize_t),
+             &long_compare);
+
+       /* create the resulting slice */
+       long previdx = 0;
+       selected_indices[num_selected] = num_revs;
+       for (i = 0; i <= num_selected; i++) {
+               long idx = selected_indices[i];
+               chunk = _trimchunk(self, list_revs, previdx, idx);
+               if (chunk == NULL) {
+                       goto bail;
+               }
+               if (PyList_GET_SIZE(chunk) > 0) {
+                       if (PyList_Append(allchunks, chunk) == -1) {
+                               goto bail;
+                       }
+               }
+               Py_DECREF(chunk);
+               chunk = NULL;
+               previdx = idx;
+       }
+       result = allchunks;
+       goto done;
+
+bail:
+       if (allchunks != NULL) {
+               Py_DECREF(allchunks);
+       }
+       if (chunk != NULL) {
+               Py_DECREF(chunk);
+       }
+done:
+       if (revs != NULL) {
+               free(revs);
+       }
+       if (gaps != NULL) {
+               free(gaps);
+       }
+       if (selected_indices != NULL) {
+               free(selected_indices);
+       }
+       if (result != NULL) {
+               Py_INCREF(result);
+       }
+       return result;
+}
+
 static inline int nt_level(const char *node, Py_ssize_t level)
 {
        int v = node[level >> 1];
@@ -2265,6 +2457,8 @@ static PyMethodDef index_methods[] = {
      "get filtered head revisions"}, /* Can always do filtering */
     {"deltachain", (PyCFunction)index_deltachain, METH_VARARGS,
      "determine revisions with deltas to reconstruct fulltext"},
+    {"slicechunktodensity", (PyCFunction)index_slicechunktodensity,
+     METH_VARARGS, "determine revisions with deltas to reconstruct fulltext"},
     {"append", (PyCFunction)index_append, METH_O, "append an index entry"},
     {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
      "match a potentially ambiguous node ID"},
_______________________________________________
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Reply via email to