Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-bitarray for openSUSE:Factory checked in at 2023-02-16 16:55:44 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-bitarray (Old) and /work/SRC/openSUSE:Factory/.python-bitarray.new.22824 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-bitarray" Thu Feb 16 16:55:44 2023 rev:16 rq:1065971 version:2.7.2 Changes: -------- --- /work/SRC/openSUSE:Factory/python-bitarray/python-bitarray.changes 2023-02-11 21:57:50.223814417 +0100 +++ /work/SRC/openSUSE:Factory/.python-bitarray.new.22824/python-bitarray.changes 2023-02-16 16:55:58.754731167 +0100 @@ -1,0 +2,10 @@ +Wed Feb 15 13:59:36 UTC 2023 - Dirk Müller <dmuel...@suse.com> + +- update to 2.7.2: + * speedup all count functionality by using + ``__builtin_popcountll`` when available + * add ``popcount64()`` to ``bitarray.h`` - we assume now that + ``uint64_t`` is always available + * improve testing + +------------------------------------------------------------------- Old: ---- bitarray-2.7.1.tar.gz New: ---- bitarray-2.7.2.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-bitarray.spec ++++++ --- /var/tmp/diff_new_pack.LO5IPt/_old 2023-02-16 16:55:59.306733373 +0100 +++ /var/tmp/diff_new_pack.LO5IPt/_new 2023-02-16 16:55:59.314733406 +0100 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-bitarray -Version: 2.7.1 +Version: 2.7.2 Release: 0 Summary: Efficient Arrays of Booleans License: Python-2.0 ++++++ bitarray-2.7.1.tar.gz -> bitarray-2.7.2.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/CHANGE_LOG new/bitarray-2.7.2/CHANGE_LOG --- old/bitarray-2.7.1/CHANGE_LOG 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/CHANGE_LOG 2023-02-13 00:51:35.000000000 +0100 @@ -1,3 +1,12 @@ +2023-02-12 2.7.2: +------------------- + * speedup all count functionality by using `__builtin_popcountll` when + available, see #187 + * add `popcount64()` to `bitarray.h` - we assume now that `uint64_t` is + always available + * improve testing + + 2023-02-10 2.7.1: ------------------- * optimize `util.sc_encode()` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/README.rst new/bitarray-2.7.2/README.rst --- old/bitarray-2.7.1/README.rst 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/README.rst 2023-02-13 00:51:35.000000000 +0100 @@ -63,20 +63,20 @@ $ python -c 'import bitarray; bitarray.test()' bitarray is installed in: /Users/ilan/bitarray/bitarray - bitarray version: 2.7.1 + bitarray version: 2.7.2 sys.version: 3.11.0 (main, Oct 25 2022) [Clang 14.0.4] sys.prefix: /Users/ilan/Mini3/envs/py311 pointer size: 64 bit sizeof(size_t): 8 sizeof(bitarrayobject): 80 - PY_UINT64_T defined: 1 - USE_WORD_SHIFT: 1 + __clang__ or __GNUC__ defined: 1 + PY_LITTLE_ENDIAN (use word shift): 1 DEBUG: 0 ......................................................................... ......................................................................... ................................................................ ---------------------------------------------------------------------- - Ran 464 tests in 0.472s + Ran 466 tests in 0.460s OK @@ -401,7 +401,7 @@ Reference ========= -bitarray version: 2.7.1 -- `change log <https://github.com/ilanschnell/bitarray/blob/master/doc/changelog.rst>`__ +bitarray version: 2.7.2 -- `change log <https://github.com/ilanschnell/bitarray/blob/master/doc/changelog.rst>`__ In the following, ``item`` and ``value`` are usually a single bit - an integer 0 or 1. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/bitarray/_bitarray.c new/bitarray-2.7.2/bitarray/_bitarray.c --- old/bitarray-2.7.1/bitarray/_bitarray.c 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/bitarray/_bitarray.c 2023-02-13 00:51:35.000000000 +0100 @@ -193,24 +193,6 @@ } } -#ifdef PY_UINT64_T -#define UINT64_BUFFER(self) ((PY_UINT64_T *) (self)->ob_item) -#define UINT64_WORDS(bytes) ((bytes) >> 3) -/* use 64-bit word shift only when the machine has little endian byteorder */ -#ifdef PY_LITTLE_ENDIAN -#define USE_WORD_SHIFT PY_LITTLE_ENDIAN -#else -#define USE_WORD_SHIFT (*((PY_UINT64_T *) "\xff\0\0\0\0\0\0\0") == 0xff) -#endif -#else /* !PY_UINT64_T */ -/* The UINT64_BUFFER macro only exists here in order to write code which - complies with and without PY_UINT64_T defined (in order to avoid - #ifdef'ing the code below). */ -#define UINT64_BUFFER(self) ((self)->ob_item) -#define UINT64_WORDS(bytes) 0 -#define USE_WORD_SHIFT 0 -#endif - /* Shift bits in byte-range(a, b) by n bits to right (using uint64 shifts when possible). The parameter (bebr = big endian byte reverse) is used to allow this @@ -235,7 +217,7 @@ if (bebr && IS_BE(self)) bytereverse(self, a, b); - if (USE_WORD_SHIFT && b >= a + 8) { + if (PY_LITTLE_ENDIAN && b >= a + 8) { const Py_ssize_t wa = (a + 7) / 8; /* word range(wa, wb) */ const Py_ssize_t wb = b / 8; const Py_ssize_t va = 8 * wa, vb = 8 * wb; @@ -251,7 +233,7 @@ for (i = wb - 1; i >= wa; i--) { assert_byte_in_range(self, 8 * i + 7); /* shift word - assumes machine has little endian byteorder */ - UINT64_BUFFER(self)[i] <<= n; + ((uint64_t *) ucbuff)[i] <<= n; if (i != wa) /* add shifted byte from next lower word */ ucbuff[8 * i] |= ucbuff[8 * i - 1] >> m; } @@ -381,15 +363,17 @@ invert(bitarrayobject *self) { const Py_ssize_t nbytes = Py_SIZE(self); - const Py_ssize_t nwords = UINT64_WORDS(nbytes); + const Py_ssize_t cwords = nbytes / 8; /* complete 64-bit words */ + char *buff = self->ob_item; + uint64_t *wbuff = (uint64_t *) buff; Py_ssize_t i; assert_nbits(self); assert(self->readonly == 0); - for (i = 0; i < nwords; i++) - UINT64_BUFFER(self)[i] = ~UINT64_BUFFER(self)[i]; - for (i = 8 * nwords; i < nbytes; i++) - self->ob_item[i] = ~self->ob_item[i]; + for (i = 0; i < cwords; i++) + wbuff[i] = ~wbuff[i]; + for (i = 8 * cwords; i < nbytes; i++) + buff[i] = ~buff[i]; } /* repeat self m times (negative m is treated as 0) */ @@ -457,17 +441,30 @@ static Py_ssize_t count(bitarrayobject *self, Py_ssize_t a, Py_ssize_t b) { + const Py_ssize_t n = b - a; Py_ssize_t res = 0, i; assert(0 <= a && a <= self->nbits); assert(0 <= b && b <= self->nbits); - if (a >= b) + if (n <= 0) return 0; - if (b >= a + 8) { + if (n >= 64) { + const Py_ssize_t word_a = (a + 63) / 64; + const Py_ssize_t word_b = b / 64; + const uint64_t *wbuff = (uint64_t *) self->ob_item; + + assert(a + 64 > 64 * word_a && 64 * word_b + 64 > b); + + res += count(self, a, 64 * word_a); + for (i = word_a; i < word_b; i++) + res += popcount64(wbuff[i]); + res += count(self, 64 * word_b, b); + } + else if (n >= 8) { const Py_ssize_t byte_a = BYTES(a); const Py_ssize_t byte_b = b / 8; - unsigned char *ucbuff = (unsigned char *) self->ob_item; + const unsigned char *ucbuff = (unsigned char *) self->ob_item; assert(a + 8 > 8 * byte_a && 8 * byte_b + 8 > b); @@ -496,14 +493,14 @@ if (n <= 0) return -1; -#ifdef PY_UINT64_T /* When the search range is greater than 64 bits, we skip uint64 words. Note that we cannot check for n >= 64 here as the function could then go into an infinite recursive loop when a word is found. */ if (n > 64) { - Py_ssize_t word_a = (a + 63) / 64; - Py_ssize_t word_b = b / 64; - PY_UINT64_T *wbuff = (PY_UINT64_T *) self->ob_item, w = vi ? 0 : ~0; + const Py_ssize_t word_a = (a + 63) / 64; + const Py_ssize_t word_b = b / 64; + const uint64_t *wbuff = (uint64_t *) self->ob_item; + const uint64_t w = vi ? 0 : ~0; if ((res = find_bit(self, vi, a, 64 * word_a)) >= 0) return res; @@ -515,14 +512,14 @@ } return find_bit(self, vi, 64 * word_b, b); } -#endif + /* For the same reason as above, we cannot check for n >= 8 here. */ if (n > 8) { - Py_ssize_t byte_a = BYTES(a); - Py_ssize_t byte_b = b / 8; - char *buff = self->ob_item, c = vi ? 0 : ~0; + const Py_ssize_t byte_a = BYTES(a); + const Py_ssize_t byte_b = b / 8; + const char *buff = self->ob_item; + const char c = vi ? 0 : ~0; - assert(UINT64_WORDS(8) == 0 || n <= 64); if ((res = find_bit(self, vi, a, 8 * byte_a)) >= 0) return res; @@ -533,7 +530,7 @@ } return find_bit(self, vi, 8 * byte_b, b); } - assert(n <= 8); + for (i = a; i < b; i++) { if (getbit(self, i) == vi) return i; @@ -754,7 +751,7 @@ return extend_bitarray(self, (bitarrayobject *) obj); if (PyBytes_Check(obj)) { /* bytes 01 */ -#ifdef IS_PY3K +#if IS_PY3K PyErr_SetString(PyExc_TypeError, "cannot extend bitarray with 'bytes', " "use .pack() or .frombytes() instead"); @@ -1089,7 +1086,7 @@ assert(PyLong_Check(result)); if (PyLong_AsSsize_t(result) < 0) { Py_DECREF(result); -#ifdef IS_PY3K +#if IS_PY3K PyErr_Format(PyExc_ValueError, "%A not in bitarray", PyTuple_GET_ITEM(args, 0)); #else @@ -1415,7 +1412,7 @@ static PyObject * bitarray_tolist(bitarrayobject *self) { - PyObject *list, *item; + PyObject *list; Py_ssize_t i; list = PyList_New(self->nbits); @@ -1423,7 +1420,7 @@ return NULL; for (i = 0; i < self->nbits; i++) { - item = PyLong_FromLong(getbit(self, i)); + PyObject *item = PyLong_FromLong(getbit(self, i)); if (item == NULL) { Py_DECREF(list); return NULL; @@ -2177,32 +2174,36 @@ bitwise(bitarrayobject *self, bitarrayobject *other, const char oper) { const Py_ssize_t nbytes = Py_SIZE(self); - const Py_ssize_t nwords = UINT64_WORDS(nbytes); + const Py_ssize_t cwords = nbytes / 8; /* complete 64-bit words */ Py_ssize_t i; + char *buff_s = self->ob_item; + char *buff_o = other->ob_item; + uint64_t *wbuff_s = (uint64_t *) buff_s; + uint64_t *wbuff_o = (uint64_t *) buff_o; assert(self->nbits == other->nbits); assert(self->endian == other->endian); assert_nbits(self); switch (oper) { case '&': - for (i = 0; i < nwords; i++) - UINT64_BUFFER(self)[i] &= UINT64_BUFFER(other)[i]; - for (i = 8 * nwords; i < nbytes; i++) - self->ob_item[i] &= other->ob_item[i]; + for (i = 0; i < cwords; i++) + wbuff_s[i] &= wbuff_o[i]; + for (i = 8 * cwords; i < nbytes; i++) + buff_s[i] &= buff_o[i]; break; case '|': - for (i = 0; i < nwords; i++) - UINT64_BUFFER(self)[i] |= UINT64_BUFFER(other)[i]; - for (i = 8 * nwords; i < nbytes; i++) - self->ob_item[i] |= other->ob_item[i]; + for (i = 0; i < cwords; i++) + wbuff_s[i] |= wbuff_o[i]; + for (i = 8 * cwords; i < nbytes; i++) + buff_s[i] |= buff_o[i]; break; case '^': - for (i = 0; i < nwords; i++) - UINT64_BUFFER(self)[i] ^= UINT64_BUFFER(other)[i]; - for (i = 8 * nwords; i < nbytes; i++) - self->ob_item[i] ^= other->ob_item[i]; + for (i = 0; i < cwords; i++) + wbuff_s[i] ^= wbuff_o[i]; + for (i = 8 * cwords; i < nbytes; i++) + buff_s[i] ^= buff_o[i]; break; default: @@ -2451,7 +2452,7 @@ value = PyDict_GetItem(codedict, symbol); Py_DECREF(symbol); if (value == NULL) { -#ifdef IS_PY3K +#if IS_PY3K PyErr_Format(PyExc_ValueError, "symbol not defined in prefix code: %A", symbol); #else @@ -2552,7 +2553,7 @@ return 0; ambiguity: -#ifdef IS_PY3K +#if IS_PY3K PyErr_Format(PyExc_ValueError, "prefix code ambiguous: %A", symbol); #else PyErr_SetString(PyExc_ValueError, "prefix code ambiguous"); @@ -2818,7 +2819,7 @@ create a binary tree object to be passed to `.decode()` or `.iterdecode()`."); static PyTypeObject DecodeTree_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -3007,7 +3008,7 @@ } static PyTypeObject DecodeIter_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -3112,7 +3113,7 @@ } static PyTypeObject SearchIter_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -3556,7 +3557,7 @@ } static PyTypeObject BitarrayIter_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -3711,7 +3712,7 @@ static PyTypeObject Bitarray_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -3813,7 +3814,7 @@ (int) sizeof(bitarrayobject), (int) sizeof(decodetreeobject), (int) sizeof(binode), -#ifdef PY_UINT64_T +#if (defined(__clang__) || defined(__GNUC__)) 1, #else 0, @@ -3823,7 +3824,7 @@ #else 0, #endif - (int) USE_WORD_SHIFT + (int) PY_LITTLE_ENDIAN ); } @@ -3837,9 +3838,9 @@ 2. sizeof(bitarrayobject)\n\ 3. sizeof(decodetreeobject)\n\ 4. sizeof(binode)\n\ -5. PY_UINT64_T defined\n\ +5. __clang__ or __GNUC__ defined\n\ 6. NDEBUG not defined\n\ -7. USE_WORD_SHIFT"); +7. PY_LITTLE_ENDIAN"); static PyMethodDef module_functions[] = { @@ -3854,14 +3855,14 @@ /******************************* Install Module ***************************/ -#ifdef IS_PY3K +#if IS_PY3K static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_bitarray", 0, -1, module_functions, }; #endif PyMODINIT_FUNC -#ifdef IS_PY3K +#if IS_PY3K PyInit__bitarray(void) #else init_bitarray(void) @@ -3871,7 +3872,7 @@ setup_reverse_trans(); -#ifdef IS_PY3K +#if IS_PY3K m = PyModule_Create(&moduledef); #else m = Py_InitModule3("_bitarray", module_functions, 0); @@ -3905,7 +3906,7 @@ PyModule_AddObject(m, "__version__", Py_BuildValue("s", BITARRAY_VERSION)); -#ifdef IS_PY3K +#if IS_PY3K return m; error: return NULL; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/bitarray/_util.c new/bitarray-2.7.2/bitarray/_util.c --- old/bitarray-2.7.1/bitarray/_util.c 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/bitarray/_util.c 2023-02-13 00:51:35.000000000 +0100 @@ -91,10 +91,10 @@ /* Return the smallest index i for which a.count(vi, 0, i) == n. When n exceeds the total count, return -1. */ static Py_ssize_t -count_to_n(bitarrayobject *a, Py_ssize_t n, int vi) +count_n_core(bitarrayobject *a, Py_ssize_t n, int vi) { const Py_ssize_t nbits = a->nbits; - unsigned char *ucbuff = (unsigned char *) a->ob_item; + uint64_t *wbuff = (uint64_t *) a->ob_item; Py_ssize_t i = 0; /* index */ Py_ssize_t j = 0; /* total count up to index */ Py_ssize_t block_start, block_stop, k, m; @@ -108,11 +108,11 @@ while (i + BLOCK_BITS < nbits) { m = 0; assert(i % 8 == 0); - block_start = i >> 3; - block_stop = block_start + (BLOCK_BITS >> 3); - assert(block_stop <= Py_SIZE(a)); + block_start = i / 64; + block_stop = block_start + (BLOCK_BITS / 64); + assert(8 * block_stop <= Py_SIZE(a)); for (k = block_start; k < block_stop; k++) - m += bitcount_lookup[ucbuff[k]]; + m += popcount64(wbuff[k]); if (!vi) m = BLOCK_BITS - m; if (j + m >= n) @@ -122,16 +122,16 @@ } #undef BLOCK_BITS - while (i + 8 < nbits) { - k = i >> 3; - assert(k < Py_SIZE(a)); - m = bitcount_lookup[ucbuff[k]]; + while (i + 64 < nbits) { + k = i / 64; + assert(8 * k + 8 <= Py_SIZE(a)); + m = popcount64(wbuff[k]); if (!vi) - m = 8 - m; + m = 64 - m; if (j + m >= n) break; j += m; - i += 8; + i += 64; } while (j < n && i < nbits) { @@ -162,7 +162,7 @@ PyErr_SetString(PyExc_ValueError, "n larger than bitarray size"); return NULL; } - i = count_to_n(a, n, vi); /* do actual work here */ + i = count_n_core(a, n, vi); /* do actual work here */ if (i < 0) { PyErr_SetString(PyExc_ValueError, "n exceeds total count"); @@ -193,11 +193,11 @@ return -1; /* the logic here is the same as in find_bit() in _bitarray.c */ -#ifdef PY_UINT64_T if (n > 64) { - Py_ssize_t word_a = (a + 63) / 64; - Py_ssize_t word_b = b / 64; - PY_UINT64_T *wbuff = (PY_UINT64_T *) self->ob_item, w = vi ? 0 : ~0; + const Py_ssize_t word_a = (a + 63) / 64; + const Py_ssize_t word_b = b / 64; + const uint64_t *wbuff = (uint64_t *) self->ob_item; + const uint64_t w = vi ? 0 : ~0; if ((res = find_last(self, vi, 64 * word_b, b)) >= 0) return res; @@ -208,11 +208,12 @@ } return find_last(self, vi, a, 64 * word_a); } -#endif + if (n > 8) { - Py_ssize_t byte_a = BYTES(a); - Py_ssize_t byte_b = b / 8; - char *buff = self->ob_item, c = vi ? 0 : ~0; + const Py_ssize_t byte_a = BYTES(a); + const Py_ssize_t byte_b = b / 8; + const char *buff = self->ob_item; + const char c = vi ? 0 : ~0; if ((res = find_last(self, vi, 8 * byte_b, b)) >= 0) return res; @@ -224,7 +225,7 @@ } return find_last(self, vi, a, 8 * byte_a); } - assert(n <= 8); + for (i = b - 1; i >= a; i--) { if (getbit(self, i) == vi) return i; @@ -304,10 +305,11 @@ static PyObject * binary_function(PyObject *args, const char *format, const char oper) { - Py_ssize_t cnt = 0, s, i; + Py_ssize_t cnt = 0, cwords, cbytes, i; bitarrayobject *a, *b; unsigned char *buff_a, *buff_b; - int r; + uint64_t *wbuff_a, *wbuff_b; + int rbits; if (!PyArg_ParseTuple(args, format, bitarray_type_obj, (PyObject *) &a, @@ -318,45 +320,62 @@ buff_a = (unsigned char *) a->ob_item; buff_b = (unsigned char *) b->ob_item; - s = a->nbits / 8; /* number of whole bytes in buffer */ - r = a->nbits % 8; /* remaining bits */ + wbuff_a = (uint64_t *) buff_a; + wbuff_b = (uint64_t *) buff_b; + cwords = a->nbits / 64; /* number of complete 64-bit words */ + cbytes = a->nbits / 8; /* number of complete bytes in buffer */ + rbits = a->nbits % 8; /* remaining bits */ switch (oper) { #define UZ(x) ((unsigned char) zeroed_last_byte(x)) case '&': /* count and */ - for (i = 0; i < s; i++) + for (i = 0; i < cwords; i++) + cnt += popcount64(wbuff_a[i] & wbuff_b[i]); + for (i = 8 * cwords; i < cbytes; i++) cnt += bitcount_lookup[buff_a[i] & buff_b[i]]; - if (r) + if (rbits) cnt += bitcount_lookup[UZ(a) & UZ(b)]; break; case '|': /* count or */ - for (i = 0; i < s; i++) + for (i = 0; i < cwords; i++) + cnt += popcount64(wbuff_a[i] | wbuff_b[i]); + for (i = 8 * cwords; i < cbytes; i++) cnt += bitcount_lookup[buff_a[i] | buff_b[i]]; - if (r) + if (rbits) cnt += bitcount_lookup[UZ(a) | UZ(b)]; break; case '^': /* count xor */ - for (i = 0; i < s; i++) + for (i = 0; i < cwords; i++) + cnt += popcount64(wbuff_a[i] ^ wbuff_b[i]); + for (i = 8 * cwords; i < cbytes; i++) cnt += bitcount_lookup[buff_a[i] ^ buff_b[i]]; - if (r) + if (rbits) cnt += bitcount_lookup[UZ(a) ^ UZ(b)]; break; case 'a': /* any and */ - for (i = 0; i < s; i++) { + for (i = 0; i < cwords; i++) { + if (wbuff_a[i] & wbuff_b[i]) + Py_RETURN_TRUE; + } + for (i = 8 * cwords; i < cbytes; i++) { if (buff_a[i] & buff_b[i]) Py_RETURN_TRUE; } - return PyBool_FromLong(r && (UZ(a) & UZ(b))); + return PyBool_FromLong(rbits && (UZ(a) & UZ(b))); case 's': /* is subset */ - for (i = 0; i < s; i++) { + for (i = 0; i < cwords; i++) { + if ((wbuff_a[i] & wbuff_b[i]) != wbuff_a[i]) + Py_RETURN_FALSE; + } + for (i = 8 * cwords; i < cbytes; i++) { if ((buff_a[i] & buff_b[i]) != buff_a[i]) Py_RETURN_FALSE; } - return PyBool_FromLong(r == 0 || (UZ(a) & UZ(b)) == UZ(a)); + return PyBool_FromLong(rbits == 0 || (UZ(a) & UZ(b)) == UZ(a)); default: Py_UNREACHABLE(); @@ -412,9 +431,10 @@ static PyObject * correspond_all(PyObject *module, PyObject *args) { - Py_ssize_t nff = 0, nft = 0, ntf = 0, ntt = 0, i, s; + Py_ssize_t nff = 0, nft = 0, ntf = 0, ntt = 0, cwords, cbytes, i; bitarrayobject *a, *b; unsigned char u, v, not_u, not_v; + uint64_t u64, v64, not_u64, not_v64; if (!PyArg_ParseTuple(args, "O!O!:_correspond_all", bitarray_type_obj, (PyObject *) &a, @@ -422,9 +442,20 @@ return NULL; if (same_size_endian(a, b) < 0) return NULL; - s = a->nbits / 8; /* number of whole bytes in buffer */ + cwords = a->nbits / 64; /* number of complete 64-bit words */ + cbytes = a->nbits / 8; /* number of complete bytes in buffer */ - for (i = 0; i < s; i++) { + for (i = 0; i < cwords; i++) { + u64 = ((uint64_t *) a->ob_item)[i]; + v64 = ((uint64_t *) b->ob_item)[i]; + not_u64 = ~u64; + not_v64 = ~v64; + nff += popcount64(not_u64 & not_v64); + nft += popcount64(not_u64 & v64); + ntf += popcount64(u64 & not_v64); + ntt += popcount64(u64 & v64); + } + for (i = 8 * cwords; i < cbytes; i++) { u = a->ob_item[i]; v = b->ob_item[i]; not_u = ~u; @@ -436,8 +467,8 @@ } if (a->nbits % 8) { unsigned char mask = ones_table[IS_BE(a)][a->nbits % 8]; - u = a->ob_item[s]; - v = b->ob_item[s]; + u = a->ob_item[cbytes]; + v = b->ob_item[cbytes]; not_u = ~u; not_v = ~v; nff += bitcount_lookup[not_u & not_v & mask]; @@ -962,7 +993,7 @@ return -1; } -#ifdef IS_PY3K +#if IS_PY3K if (PyLong_Check(item)) c = (unsigned char) PyLong_AsLong(item); #else @@ -1030,14 +1061,22 @@ return n; } -/* starting from byte i count the remaining population in bitarray buffer */ +/* starting from word `i` count the remaining population in bitarray buffer */ static Py_ssize_t -count_from(bitarrayobject *a, Py_ssize_t i) +count_from_word(bitarrayobject *a, Py_ssize_t i) { + const Py_ssize_t cwords = a->nbits / 64; + const Py_ssize_t cbytes = a->nbits / 8; Py_ssize_t cnt = 0; - while (i < a->nbits / 8) - cnt += bitcount_lookup[(unsigned char) a->ob_item[i++]]; + if (64 * i >= a->nbits) + return 0; + + while (i < cwords) + cnt += popcount64(((uint64_t *) a->ob_item)[i++]); + + for (i = 8 * cwords; i < cbytes; i++) + cnt += bitcount_lookup[(unsigned char) a->ob_item[i]]; if (a->nbits % 8) cnt += bitcount_lookup[(unsigned char) zeroed_last_byte(a)]; @@ -1056,10 +1095,12 @@ #define BSI(n) (((Py_ssize_t) 1) << (8 * (n) - 3)) /* segment size in bytes - Although of little practical value, the code - below will also work when changing SEGSIZE to 1, 2, 4, 8 or 16, as long - as a multiple of SEGSIZE is 32. The size 32 is rooted in the fact that - a bitarray of 32 bytes (256 bits) can be indexed with one index byte - (BSI(1) = 32). Our entire 'sc' format is constructed around this. */ + below will also work for SEGSIZE values of: 8, 16 and 32 + BSI(1) = 32 must be divisible by SEGSIZE. + SEGSIZE must also be a multiple of the word size (sizeof(uint64_t) = 8). + The size 32 is rooted in the fact that a bitarray of 32 bytes (256 bits) + can be indexed with one index byte (BSI(1) = 32). Our entire 'sc' format + is constructed around this. */ #define SEGSIZE 32 /* number of 256 bit segments given nbits */ @@ -1127,8 +1168,8 @@ res[j] = cnt; assert(buff - a->ob_item + SEGSIZE <= Py_SIZE(a)); if (memcmp(buff, zeros, SEGSIZE)) { /* segment has not only zeros */ - for (i = 0; i < SEGSIZE; i++) - cnt += bitcount_lookup[(unsigned char) buff[i]]; + for (i = 0; i < SEGSIZE / 8; i++) + cnt += popcount64(((uint64_t *) buff)[i]); } } assert(buff - a->ob_item == SEGSIZE * c_seg); @@ -1138,12 +1179,44 @@ assert(Py_SIZE(a) <= SEGSIZE * n_seg); assert(a->nbits && a->nbits < 8 * SEGSIZE * n_seg); - cnt += count_from(a, SEGSIZE * c_seg); + cnt += count_from_word(a, (SEGSIZE / 8) * c_seg); res[n_seg] = cnt; } return res; } +/* expose sc_calc_rts() to Python during debug mode for testing */ +#ifndef NDEBUG +static PyObject * +sc_rts(PyObject *module, PyObject *obj) +{ + PyObject *list; + bitarrayobject *a; + Py_ssize_t *rts, i; + + if (ensure_bitarray(obj) < 0) + return NULL; + + a = (bitarrayobject *) obj; + if ((rts = sc_calc_rts(a)) == NULL) + return NULL; + + if ((list = PyList_New(NSEG(a->nbits) + 1)) == NULL) + return NULL; + + for (i = 0; i <= NSEG(a->nbits); i++) { + PyObject *item = PyLong_FromSsize_t(rts[i]); + if (item == NULL) { + Py_DECREF(list); + return NULL; + } + PyList_SET_ITEM(list, i, item); + } + PyMem_Free(rts); + return list; +} +#endif /* NDEBUG */ + /* Equivalent to the Python expression: a.count(1, 8 * offset, 8 * offset + (1 << (8 * n))) @@ -1263,9 +1336,6 @@ Py_UNREACHABLE(); } -#undef SEGSIZE -#undef NSEG - /* Write one sparse block (from `offset`, and up to `k` one bits). Return number of bytes written to buffer `str` (encoded block size). */ static Py_ssize_t @@ -1904,7 +1974,7 @@ } static PyTypeObject CHDI_Type = { -#ifdef IS_PY3K +#if IS_PY3K PyVarObject_HEAD_INIT(NULL, 0) #else PyObject_HEAD_INIT(NULL) @@ -1973,19 +2043,23 @@ METH_VARARGS, vl_decode_doc}, {"canonical_decode", (PyCFunction) chdi_new, METH_VARARGS, chdi_doc}, +#ifndef NDEBUG + /* functionality exposed in debug mode for testing */ + {"_sc_rts", (PyCFunction) sc_rts, METH_O, 0}, +#endif {NULL, NULL} /* sentinel */ }; /******************************* Install Module ***************************/ -#ifdef IS_PY3K +#if IS_PY3K static PyModuleDef moduledef = { PyModuleDef_HEAD_INIT, "_util", 0, -1, module_functions, }; #endif PyMODINIT_FUNC -#ifdef IS_PY3K +#if IS_PY3K PyInit__util(void) #else init_util(void) @@ -2000,7 +2074,7 @@ if (bitarray_type_obj == NULL) goto error; -#ifdef IS_PY3K +#if IS_PY3K m = PyModule_Create(&moduledef); #else m = Py_InitModule3("_util", module_functions, 0); @@ -2012,7 +2086,11 @@ goto error; Py_SET_TYPE(&CHDI_Type, &PyType_Type); -#ifdef IS_PY3K +#ifndef NDEBUG + PyModule_AddObject(m, "_SEGSIZE", PyLong_FromSsize_t(SEGSIZE)); +#endif + +#if IS_PY3K return m; error: return NULL; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/bitarray/bitarray.h new/bitarray-2.7.2/bitarray/bitarray.h --- old/bitarray-2.7.1/bitarray/bitarray.h 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/bitarray/bitarray.h 2023-02-13 00:51:35.000000000 +0100 @@ -4,7 +4,7 @@ Author: Ilan Schnell */ -#define BITARRAY_VERSION "2.7.1" +#define BITARRAY_VERSION "2.7.2" #ifdef STDC_HEADERS #include <stddef.h> @@ -29,10 +29,17 @@ #define Py_UNREACHABLE() abort() #endif +/* PY_LITTLE_ENDIAN and PY_BIG_ENDIAN (machine byteorder) are available in + Python 3.5.10. Not sure when they were introduced */ +#ifndef PY_LITTLE_ENDIAN +#define PY_LITTLE_ENDIAN (*((uint64_t *) "\xff\0\0\0\0\0\0\0") == 0xff) +#endif + #if PY_MAJOR_VERSION >= 3 -#define IS_PY3K +#define IS_PY3K 1 #define BYTES_SIZE_FMT "y#" #else +#define IS_PY3K 0 /* the Py_MIN and Py_MAX macros were introduced in Python 3.3 */ #define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) #define Py_MAX(x, y) (((x) > (y)) ? (x) : (y)) @@ -159,6 +166,26 @@ #undef B6 }; +/* Population count: count the number of 1's in 'x'. */ +static inline int +popcount64(uint64_t x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + return __builtin_popcountll(x); +#else + /* https://en.wikipedia.org/wiki/Hamming_weight popcount64c */ + const uint64_t m1 = 0x5555555555555555; + const uint64_t m2 = 0x3333333333333333; + const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; + const uint64_t h01 = 0x0101010101010101; + + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2); + x = (x + (x >> 4)) & m4; + return (x * h01) >> 56; +#endif /* __GNUC__ */ +} + /* adjust index a manner consistent with the handling of normal slices */ static inline void adjust_index(Py_ssize_t length, Py_ssize_t *i, Py_ssize_t step) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/bitarray/test_bitarray.py new/bitarray-2.7.2/bitarray/test_bitarray.py --- old/bitarray-2.7.1/bitarray/test_bitarray.py 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/bitarray/test_bitarray.py 2023-02-13 00:51:35.000000000 +0100 @@ -2841,12 +2841,29 @@ self.assertEqual(a.count(v), ref) self.assertEqual(a.count(v, n, -n - 1, -1), ref) + def test_sparse(self): + N = 65536 + a = zeros(N) + indices = set(randint(0, N - 1) for _ in range(256)) + for i in indices: + a[i] = 1 + self.assertEqual(a.count(1), len(indices)) + self.assertEqual(a.count(0), N - len(indices)) + + for _ in range(100): + i = randint(0, N - 1) + j = randint(i, N - 1) + cnt = sum(1 for k in indices if i <= k < j) + self.assertEqual(a.count(1, i, j), cnt) + self.assertEqual(a.count(0, i, j), j - i - cnt) + def test_zeros(self): - N = 37 + N = 300 a = zeros(N) - for i in range(N): - for j in range(i, N): - self.assertEqual(a.count(0, i, j), j - i) + for _ in range(10): + i = randint(0, N - 1) + j = randint(i, N - 1) + self.assertEqual(a.count(0, i, j), j - i) for step in range(-N - 3, N + 3): if step == 0: @@ -4629,8 +4646,8 @@ print('pointer size: %d bit' % (8 * SYSINFO[0])) print('sizeof(size_t): %d' % SYSINFO[1]) print('sizeof(bitarrayobject): %d' % SYSINFO[2]) - print('PY_UINT64_T defined: %s' % SYSINFO[5]) - print('USE_WORD_SHIFT: %s' % SYSINFO[7]) + print('__clang__ or __GNUC__ defined: %s' % SYSINFO[5]) + print('PY_LITTLE_ENDIAN (use word shift): %s' % SYSINFO[7]) print('DEBUG: %s' % DEBUG) suite = unittest.TestSuite() for cls in tests: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/bitarray/test_util.py new/bitarray-2.7.2/bitarray/test_util.py --- old/bitarray-2.7.1/bitarray/test_util.py 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/bitarray/test_util.py 2023-02-13 00:51:35.000000000 +0100 @@ -18,7 +18,7 @@ from bitarray import (bitarray, frozenbitarray, decodetree, bits2bytes, get_default_endian, _set_default_endian) -from bitarray.test_bitarray import Util, skipIf +from bitarray.test_bitarray import Util, skipIf, DEBUG from bitarray.util import ( zeros, urandom, pprint, make_endian, rindex, strip, count_n, @@ -30,6 +30,11 @@ huffman_code, canonical_huffman, canonical_decode, ) +if DEBUG: + from bitarray._util import _sc_rts, _SEGSIZE # type: ignore +else: + _SEGSIZE = -1 + if sys.version_info[0] == 3: from io import StringIO else: @@ -516,6 +521,14 @@ self.assertEqual(count_n(a, 1), i + 1) self.assertRaises(ValueError, count_n, a, 2) + def test_last(self): + for n in range(1, 1000): + a = zeros(n) + a[-1] = 1 + self.assertEqual(a.count(), 1) + self.assertEqual(count_n(a, 1), n) + self.assertRaises(ValueError, count_n, a, 2) + def test_large(self): for _ in range(100): N = randint(100000, 250000) @@ -1398,6 +1411,35 @@ a &= urandom(n, endian) self.round_trip(a) + @skipIf(not DEBUG) + def test_rts_empty(self): + a = bitarray() + self.assertEqual(_sc_rts(a), [0]) + + @skipIf(_SEGSIZE != 32) + def test_rts_example(self): + # see example before sc_calc_rts() in _util.c + a = zeros(987) + a[:5] = a[512:515] = a[768:772] = 1 + self.assertEqual(a.count(), 12) + rts = _sc_rts(a) + self.assertEqual(len(rts), 5) + self.assertEqual(rts, [0, 5, 5, 8, 12]) + + @skipIf(not DEBUG) + def test_rts_random(self): + segbits = 8 * _SEGSIZE + for n in range(2000): + a = urandom(n) + rts = _sc_rts(a) + self.assertEqual(len(rts), (n + segbits - 1) // segbits + 1) + self.assertEqual(rts[0], 0) + self.assertEqual(rts[-1], a.count()) + + for i in range(len(rts) - 1): + seg_pop = a.count(1, segbits * i, segbits * (i + 1)) + self.assertEqual(rts[i + 1] - rts[i], seg_pop) + tests.append(SC_Tests) # --------------------------------------------------------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/doc/changelog.rst new/bitarray-2.7.2/doc/changelog.rst --- old/bitarray-2.7.1/doc/changelog.rst 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/doc/changelog.rst 2023-02-13 00:51:35.000000000 +0100 @@ -1,6 +1,15 @@ Change log ========== +**2.7.2** (2023-02-12): + +* speedup all count functionality by using ``__builtin_popcountll`` when + available, see `#187 <https://github.com/ilanschnell/bitarray/issues/187>`__ +* add ``popcount64()`` to ``bitarray.h`` - we assume now that ``uint64_t`` is + always available +* improve testing + + **2.7.1** (2023-02-10): * optimize ``util.sc_encode()`` diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/doc/reference.rst new/bitarray-2.7.2/doc/reference.rst --- old/bitarray-2.7.1/doc/reference.rst 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/doc/reference.rst 2023-02-13 00:51:35.000000000 +0100 @@ -1,7 +1,7 @@ Reference ========= -bitarray version: 2.7.1 -- `change log <https://github.com/ilanschnell/bitarray/blob/master/doc/changelog.rst>`__ +bitarray version: 2.7.2 -- `change log <https://github.com/ilanschnell/bitarray/blob/master/doc/changelog.rst>`__ In the following, ``item`` and ``value`` are usually a single bit - an integer 0 or 1. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/bitarray-2.7.1/doc/sparse_compression.rst new/bitarray-2.7.2/doc/sparse_compression.rst --- old/bitarray-2.7.1/doc/sparse_compression.rst 2023-02-10 16:30:12.000000000 +0100 +++ new/bitarray-2.7.2/doc/sparse_compression.rst 2023-02-13 00:51:35.000000000 +0100 @@ -47,10 +47,10 @@ compress (ms) decompress (ms) ratio ---------------------------------------------------------- - serialize 4.562 1.188 1.0000 - sc 6.069 2.680 0.0158 - gzip 920.343 16.161 0.0169 - bz2 59.580 33.435 0.0117 + serialize 3.876 1.002 1.0000 + sc 5.502 2.703 0.0158 + gzip 918.937 10.057 0.0169 + bz2 59.500 32.611 0.0117 Statistics