On Sun, 22 May 2016 13:04:40 -0400
"Anthony G. Basile" <bas...@opensource.dyc.edu> wrote:

> From: "Anthony G. Basile" <bluen...@gentoo.org>
> 
> The current method to check for a sane system locale is to use python's
> ctypes.util.find_library() to construct a full library path to the
> system libc.so and pass that path to ctypes.CDLL() so we can call
> toupper() and tolower() directly.  However, this gets bogged down in
> implementation details and fails with musl.
> 
> We work around this design flaw in ctypes with a small python module
> written in C which provides thin wrappers to toupper() and tolower(),
> and only fall back on the current ctypes-based check when this module
> is not available.
> 
> This has been tested on glibc, uClibc and musl systems.
> 
> X-Gentoo-bug: 571444
> X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=571444
> 
> Signed-off-by: Anthony G. Basile <bluen...@gentoo.org>
> ---
>  pym/portage/util/locale.py   | 32 ++++++++++-----
>  setup.py                     |  6 ++-
>  src/portage_c_convert_case.c | 94 
> ++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 121 insertions(+), 11 deletions(-)
>  create mode 100644 src/portage_c_convert_case.c
> 
> diff --git a/pym/portage/util/locale.py b/pym/portage/util/locale.py
> index 2a15ea1..85ddd2b 100644
> --- a/pym/portage/util/locale.py
> +++ b/pym/portage/util/locale.py
> @@ -11,6 +11,7 @@ from __future__ import absolute_import, unicode_literals
>  import locale
>  import logging
>  import os
> +import sys
>  import textwrap
>  import traceback
>  
> @@ -34,18 +35,26 @@ def _check_locale(silent):
>       """
>       The inner locale check function.
>       """
> -
> -     libc_fn = find_library("c")
> -     if libc_fn is None:
> -             return None
> -     libc = LoadLibrary(libc_fn)
> -     if libc is None:
> -             return None
> +     try:
> +             from portage_c_convert_case import _c_toupper, _c_tolower
> +             libc_tolower = _c_tolower
> +             libc_toupper = _c_toupper

Now I'm being picky... but if you named the functions toupper()
and tolower(), you could actually import the whole module as 'libc'
and have less code!

Also it would be nice to actually make the module more generic. There
are more places where we use CDLL, and all of them could eventually be
supported by the module (unshare() would be much better done in C, for
example).

> +     except ImportError:
> +             writemsg_level("!!! Unable to import 
> portage_c_convert_case\n!!!\n",
> +                     level=logging.WARNING, noiselevel=-1)

Do we really want to warn verbosely about this? I think it'd be
a pretty common case for people running the git checkout.

> +             libc_fn = find_library("c")
> +             if libc_fn is None:
> +                     return None
> +             libc = LoadLibrary(libc_fn)
> +             if libc is None:
> +                     return None
> +             libc_tolower = libc.tolower
> +             libc_toupper = libc.toupper
>  
>       lc = list(range(ord('a'), ord('z')+1))
>       uc = list(range(ord('A'), ord('Z')+1))
> -     rlc = [libc.tolower(c) for c in uc]
> -     ruc = [libc.toupper(c) for c in lc]
> +     rlc = [libc_tolower(c) for c in uc]
> +     ruc = [libc_toupper(c) for c in lc]
>  
>       if lc != rlc or uc != ruc:
>               if silent:
> @@ -62,7 +71,10 @@ def _check_locale(silent):
>                       "as LC_CTYPE in make.conf.")
>               msg = [l for l in textwrap.wrap(msg, 70)]
>               msg.append("")
> -             chars = lambda l: ''.join(chr(x) for x in l)
> +             if sys.version_info.major >= 3:

Portage uses hexversion for comparisons. Please be consistent.

> +                     chars = lambda l: ''.join(chr(x) for x in l)
> +             else:
> +                     chars = lambda l: ''.join(chr(x).decode('utf-8', 
> 'replace') for x in l)

This looks like an unrelated change. Was the original code buggy? If
this is the case, then please fix it in a separate commit.

>               if uc != ruc:
>                       msg.extend([
>                               "  %s -> %s" % (chars(lc), chars(ruc)),
> diff --git a/setup.py b/setup.py
> index 25429bc..8b6b408 100755
> --- a/setup.py
> +++ b/setup.py
> @@ -47,7 +47,11 @@ x_scripts = {
>  # Dictionary custom modules written in C/C++ here.  The structure is
>  #   key   = module name
>  #   value = list of C/C++ source code, path relative to top source directory
> -x_c_helpers = {}
> +x_c_helpers = {
> +     'portage_c_convert_case' : [
> +             'src/portage_c_convert_case.c',
> +     ],
> +}
>  
>  class x_build(build):
>       """ Build command with extra build_man call. """
> diff --git a/src/portage_c_convert_case.c b/src/portage_c_convert_case.c
> new file mode 100644
> index 0000000..f60b0c2
> --- /dev/null
> +++ b/src/portage_c_convert_case.c
> @@ -0,0 +1,94 @@
> +/* Copyright 2005-2016 Gentoo Foundation
> + * Distributed under the terms of the GNU General Public License v2
> + */
> +
> +#include <Python.h>
> +#include <ctype.h>
> +
> +static PyObject * portage_c_tolower(PyObject *, PyObject *);
> +static PyObject * portage_c_toupper(PyObject *, PyObject *);
> +
> +static PyMethodDef ConvertCaseMethods[] = {
> +     {"_c_tolower", portage_c_tolower, METH_VARARGS, "Convert to lower case 
> using system locale."},
> +     {"_c_toupper", portage_c_toupper, METH_VARARGS, "Convert to upper case 
> using system locale."},
> +     {NULL, NULL, 0, NULL}

You should include stdlib.h or stdio.h for NULL.

> +};
> +
> +#if PY_MAJOR_VERSION >= 3
> +static struct PyModuleDef moduledef = {
> +     PyModuleDef_HEAD_INIT,
> +     "portage_c_convert_case",                                       /* 
> m_name */
> +     "Module for converting case using the system locale",           /* 
> m_doc */
> +     -1,                                                             /* 
> m_size */
> +     ConvertCaseMethods,                                             /* 
> m_methods */
> +     NULL,                                                           /* 
> m_reload */
> +     NULL,                                                           /* 
> m_traverse */
> +     NULL,                                                           /* 
> m_clear */
> +     NULL,                                                           /* 
> m_free */
> +};
> +#endif
> +
> +static PyObject *ConvertCaseError;
> +
> +PyMODINIT_FUNC
> +#if PY_MAJOR_VERSION >= 3
> +PyInit_portage_c_convert_case(void)
> +#else
> +initportage_c_convert_case(void)
> +#endif
> +{
> +     PyObject *m;
> +
> +#if PY_MAJOR_VERSION >= 3
> +     m = PyModule_Create(&moduledef);
> +#else
> +     m = Py_InitModule("portage_c_convert_case", ConvertCaseMethods);
> +#endif
> +
> +     if (m == NULL)
> +#if PY_MAJOR_VERSION >= 3
> +             return NULL;
> +#else
> +             return;
> +#endif
> +
> +     ConvertCaseError = 
> PyErr_NewException("portage_c_convert_case.ConvertCaseError", NULL, NULL);
> +     Py_INCREF(ConvertCaseError);
> +     PyModule_AddObject(m, "ConvertCaseError", ConvertCaseError);
> +
> +#if PY_MAJOR_VERSION >= 3
> +     return m;
> +#else
> +     return;
> +#endif
> +}

To be honest, I think we'd be happier having two big #ifdefs for init
funcs rather than one function with a lot of #ifdefs, and the common
ConvertCaseError part in a separate static function.

> +
> +
> +static PyObject *
> +portage_c_tolower(PyObject *self, PyObject *args)
> +{
> +     int c;
> +
> +     if (!PyArg_ParseTuple(args, "i", &c))
> +     {
> +             PyErr_SetString(ConvertCaseError, "_c_tolower: PyArg_ParseTuple 
> failed");

From PyArg_ParseTuple() [1]:

| on failure, it returns false and raises the appropriate exception.

So I don't think you need or should use a custom exception here.

[1]:https://docs.python.org/2/c-api/arg.html#c.PyArg_ParseTuple

> +             return NULL;
> +     }
> +
> +     return Py_BuildValue("i", tolower(c));
> +}
> +
> +
> +static PyObject *
> +portage_c_toupper(PyObject *self, PyObject *args)
> +{
> +     int c;
> +
> +     if (!PyArg_ParseTuple(args, "i", &c))
> +     {
> +             PyErr_SetString(ConvertCaseError, "_c_toupper: PyArg_ParseTuple 
> failed");
> +             return NULL;
> +     }
> +
> +     return Py_BuildValue("i", toupper(c));
> +}

Thanks a lot for your effort. This is really getting in shape.

-- 
Best regards,
Michał Górny
<http://dev.gentoo.org/~mgorny/>

Attachment: pgp9y04POWsEG.pgp
Description: OpenPGP digital signature

Reply via email to