[PATCH] tests: fix test-execute with GNU make jobserver

2021-04-06 Thread Dmitry V. Levin
On POSIX systems the GNU make jobserver is implemented as a pipe,
and these two unexpected descriptors make test-execute-child fail.

Workaround this by making all unexpected descriptors cloexec in
test-execute-main so they are not inherited by test-execute-child.

* tests/test-execute-main.c (cloexec_fds): New function.
(main): Use it.
---
OK to commit?
 ChangeLog |  6 ++
 tests/test-execute-main.c | 18 ++
 2 files changed, 24 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 46496bc75..e65a96e8b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2021-04-07  Dmitry V. Levin  
+
+   tests: fix test-execute with GNU make jobserver.
+   * tests/test-execute-main.c (cloexec_fds): New function.
+   (main): Use it.
+
 2021-04-06  Paul Eggert  
 
group-member: minor tweak to omit a *
diff --git a/tests/test-execute-main.c b/tests/test-execute-main.c
index 39e24af12..62f76de43 100644
--- a/tests/test-execute-main.c
+++ b/tests/test-execute-main.c
@@ -47,6 +47,21 @@
 
 #define BASE "test-execute"
 
+static void
+cloexec_fds(int first, int last)
+{
+  int fd;
+  for (fd = first; fd <= last; ++fd) {
+int dup_fd;
+dup_fd = fcntl (fd, F_DUPFD_CLOEXEC, 0);
+if (dup_fd >= 0) {
+  ASSERT (close (fd) == 0);
+  ASSERT (fcntl (dup_fd, F_DUPFD_CLOEXEC, fd) == fd);
+  ASSERT (close (dup_fd) == 0);
+}
+  }
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -288,6 +303,7 @@ main (int argc, char *argv[])
 case 14:
   {
 /* Check file descriptors >= 3 can be inherited.  */
+cloexec_fds(3, 19);
 ASSERT (dup2 (STDOUT_FILENO, 10) >= 0);
 const char *prog_argv[3] = { prog_path, "14", NULL };
 int ret = execute (progname, prog_argv[0], prog_argv, NULL,
@@ -298,6 +314,7 @@ main (int argc, char *argv[])
 case 15:
   {
 /* Check file descriptors >= 3 can be inherited.  */
+cloexec_fds(3, 19);
 ASSERT (fcntl (STDOUT_FILENO, F_DUPFD, 10) >= 0);
 const char *prog_argv[3] = { prog_path, "15", NULL };
 int ret = execute (progname, prog_argv[0], prog_argv, NULL,
@@ -308,6 +325,7 @@ main (int argc, char *argv[])
 case 16:
   {
 /* Check file descriptors >= 3 with O_CLOEXEC bit are not inherited.  
*/
+cloexec_fds(3, 19);
 ASSERT (fcntl (STDOUT_FILENO, F_DUPFD_CLOEXEC, 10) >= 0);
 const char *prog_argv[3] = { prog_path, "16", NULL };
 int ret = execute (progname, prog_argv[0], prog_argv, NULL,
-- 
ldv



[PATCH 1/3] xalloc-oversized: export xalloc_count_t

2021-04-06 Thread Paul Eggert
* lib/xalloc-oversized.h (__xalloc_oversized, xalloc_oversized):
* lib/xmalloca.h (nmalloca):
Comment re restrictions on arg types.
* lib/xalloc-oversized.h (xalloc_count_t): Rename from
__xalloc_count_type; all uses changed.  This publicizes the type.
---
 ChangeLog  |  9 +
 lib/malloca.h  |  1 +
 lib/xalloc-oversized.h | 24 
 lib/xmalloca.h |  3 ++-
 4 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f3cca1ab2..0c3ea48fe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2021-04-06  Paul Eggert  
+
+   xalloc-oversized: export xalloc_count_t
+   * lib/xalloc-oversized.h (__xalloc_oversized, xalloc_oversized):
+   * lib/xmalloca.h (nmalloca):
+   Comment re restrictions on arg types.
+   * lib/xalloc-oversized.h (xalloc_count_t): Rename from
+   __xalloc_count_type; all uses changed.  This publicizes the type.
+
 2021-04-05  Paul Eggert  
 
xalloc: try to pacify gcc -Wsign-compare
diff --git a/lib/malloca.h b/lib/malloca.h
index 017812d07..16a156ba2 100644
--- a/lib/malloca.h
+++ b/lib/malloca.h
@@ -77,6 +77,7 @@ extern void freea (void *p);
 /* nmalloca(N,S) is an overflow-safe variant of malloca (N * S).
It allocates an array of N objects, each with S bytes of memory,
on the stack.  S must be positive and N must be nonnegative.
+   Either N or S should be of type ptrdiff_t or size_t or wider.
The array must be freed using freea() before the function returns.  */
 #define nmalloca(n, s) (xalloc_oversized (n, s) ? NULL : malloca ((n) * (s)))
 
diff --git a/lib/xalloc-oversized.h b/lib/xalloc-oversized.h
index 53daf5966..3618c75cb 100644
--- a/lib/xalloc-oversized.h
+++ b/lib/xalloc-oversized.h
@@ -21,34 +21,34 @@
 #include 
 #include 
 
-/* True if N * S would overflow in a size_t calculation,
-   or would generate a value larger than PTRDIFF_MAX.
+/* True if N * S does not fit into both ptrdiff_t and size_t.
+   S must be positive and N must be nonnegative.
This expands to a constant expression if N and S are both constants.
-   By gnulib convention, SIZE_MAX represents overflow in size
+   By gnulib convention, SIZE_MAX represents overflow in size_t
calculations, so the conservative size_t-based dividend to use here
is SIZE_MAX - 1.  */
 #define __xalloc_oversized(n, s) \
   ((size_t) (PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX - 1) / (s) < (n))
 
 #if PTRDIFF_MAX < SIZE_MAX
-typedef ptrdiff_t __xalloc_count_type;
+typedef ptrdiff_t xalloc_count_t;
 #else
-typedef size_t __xalloc_count_type;
+typedef size_t xalloc_count_t;
 #endif
 
-/* Return 1 if an array of N objects, each of size S, cannot exist
-   reliably due to size or ptrdiff_t arithmetic overflow.  S must be
-   positive and N must be nonnegative.  This is a macro, not a
-   function, so that it works correctly even when SIZE_MAX < N.  */
-
+/* Return 1 if an array of N objects, each of size S, cannot exist reliably
+   because its total size in bytes exceeds MIN (PTRDIFF_MAX, SIZE_MAX).
+   N must be nonnegative, S must be positive, and either N or S should be
+   of type ptrdiff_t or size_t or wider.  This is a macro, not a function,
+   so that it works even if an argument exceeds MAX (PTRDIFF_MAX, SIZE_MAX).  
*/
 #if 7 <= __GNUC__ && !defined __clang__
 # define xalloc_oversized(n, s) \
-   __builtin_mul_overflow_p (n, s, (__xalloc_count_type) 1)
+   __builtin_mul_overflow_p (n, s, (xalloc_count_t) 1)
 #elif 5 <= __GNUC__ && !defined __ICC && !__STRICT_ANSI__
 # define xalloc_oversized(n, s) \
(__builtin_constant_p (n) && __builtin_constant_p (s) \
 ? __xalloc_oversized (n, s) \
-: ({ __xalloc_count_type __xalloc_count; \
+: ({ xalloc_count_t __xalloc_count; \
  __builtin_mul_overflow (n, s, &__xalloc_count); }))
 
 /* Other compilers use integer division; this may be slower but is
diff --git a/lib/xmalloca.h b/lib/xmalloca.h
index cbaa89d73..a87a336b6 100644
--- a/lib/xmalloca.h
+++ b/lib/xmalloca.h
@@ -45,7 +45,8 @@ extern void * xmmalloca (size_t n);
 
 /* xnmalloca(N,S) is an overflow-safe variant of xmalloca (N * S).
It allocates an array of N objects, each with S bytes of memory,
-   on the stack.  S must be positive and N must be nonnegative.
+   on the stack.  S must be positive and N must be nonnegative,
+   and at least one of N and S should be ptrdiff_t or size_t or wider.
The array must be freed using freea() before the function returns.
Upon failure, it exits with an error message.  */
 #if HAVE_ALLOCA
-- 
2.27.0




[PATCH 2/3] backupfile: less-aggressive buffer growth

2021-04-06 Thread Paul Eggert
* lib/backupfile.c: Include intprops.h.
(numbered_backup): Grow buffer by the usual 50%, not 100%.
This is easier to do now that we have xalloc_count_t.
* modules/backup-rename, modules/backupfile: Depend on intprops.
---
 ChangeLog | 6 ++
 lib/backupfile.c  | 6 --
 modules/backup-rename | 1 +
 modules/backupfile| 1 +
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0c3ea48fe..c68da0df8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,11 @@
 2021-04-06  Paul Eggert  
 
+   backupfile: less-aggressive buffer growth
+   * lib/backupfile.c: Include intprops.h.
+   (numbered_backup): Grow buffer by the usual 50%, not 100%.
+   This is easier to do now that we have xalloc_count_t.
+   * modules/backup-rename, modules/backupfile: Depend on intprops.
+
xalloc-oversized: export xalloc_count_t
* lib/xalloc-oversized.h (__xalloc_oversized, xalloc_oversized):
* lib/xmalloca.h (nmalloca):
diff --git a/lib/backupfile.c b/lib/backupfile.c
index c53e3f335..1e427e8de 100644
--- a/lib/backupfile.c
+++ b/lib/backupfile.c
@@ -34,6 +34,7 @@
 #include "attribute.h"
 #include "basename-lgpl.h"
 #include "idx.h"
+#include "intprops.h"
 #include "opendirat.h"
 #include "renameatu.h"
 #include "xalloc-oversized.h"
@@ -270,8 +271,9 @@ numbered_backup (int dir_fd, char **buffer, size_t 
buffer_size, size_t filelen,
   size_t new_buffer_size = filelen + 2 + versionlenmax + 2;
   if (buffer_size < new_buffer_size)
 {
-  if (! xalloc_oversized (new_buffer_size, 2))
-new_buffer_size *= 2;
+  xalloc_count_t grown;
+  if (! INT_ADD_WRAPV (new_buffer_size, new_buffer_size >> 1, ))
+new_buffer_size = grown;
   char *new_buf = realloc (buf, new_buffer_size);
   if (!new_buf)
 {
diff --git a/modules/backup-rename b/modules/backup-rename
index 4497b3350..c50e874ff 100644
--- a/modules/backup-rename
+++ b/modules/backup-rename
@@ -17,6 +17,7 @@ closedir
 d-ino
 fcntl-h
 idx
+intprops
 memcmp
 opendirat
 readdir
diff --git a/modules/backupfile b/modules/backupfile
index 41cf99b02..42c8c9ed5 100644
--- a/modules/backupfile
+++ b/modules/backupfile
@@ -17,6 +17,7 @@ closedir
 d-ino
 fcntl-h
 idx
+intprops
 memcmp
 opendirat
 readdir
-- 
2.27.0




[PATCH 3/3] group-member: minor tweak to omit a *

2021-04-06 Thread Paul Eggert
* lib/group-member.c: Include intprops.h.
(get_group_info): Use INT_MULTIPLY_WRAPV instead of
xalloc_oversized (which does a multiplication) followed by the
same multiplication.  The code was OK as-is; this is just
conceptual simplification, possible now that we have xalloc_count_t.
* modules/group-member: Depend on intprops.
---
 ChangeLog| 8 
 lib/group-member.c   | 6 --
 modules/group-member | 1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c68da0df8..46496bc75 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2021-04-06  Paul Eggert  
 
+   group-member: minor tweak to omit a *
+   * lib/group-member.c: Include intprops.h.
+   (get_group_info): Use INT_MULTIPLY_WRAPV instead of
+   xalloc_oversized (which does a multiplication) followed by the
+   same multiplication.  The code was OK as-is; this is just
+   conceptual simplification, possible now that we have xalloc_count_t.
+   * modules/group-member: Depend on intprops.
+
backupfile: less-aggressive buffer growth
* lib/backupfile.c: Include intprops.h.
(numbered_backup): Grow buffer by the usual 50%, not 100%.
diff --git a/lib/group-member.c b/lib/group-member.c
index 52159016e..17bee831b 100644
--- a/lib/group-member.c
+++ b/lib/group-member.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 
+#include "intprops.h"
 #include "xalloc-oversized.h"
 
 /* Most processes have no more than this many groups, and for these
@@ -53,10 +54,11 @@ get_group_info (struct group_info *gi)
   if (n_groups < 0)
 {
   int n_group_slots = getgroups (0, NULL);
+  xalloc_count_t nbytes;
   if (0 <= n_group_slots
-  && ! xalloc_oversized (n_group_slots, sizeof *gi->group))
+  && ! INT_MULTIPLY_WRAPV (n_group_slots, sizeof *gi->group, ))
 {
-  gi->group = malloc (n_group_slots * sizeof *gi->group);
+  gi->group = malloc (nbytes);
   if (gi->group)
 n_groups = getgroups (n_group_slots, gi->group);
 }
diff --git a/modules/group-member b/modules/group-member
index 1b743a33b..aa56ecf7e 100644
--- a/modules/group-member
+++ b/modules/group-member
@@ -9,6 +9,7 @@ Depends-on:
 unistd
 extensions
 getgroups[test $HAVE_GROUP_MEMBER = 0]
+intprops [test $HAVE_GROUP_MEMBER = 0]
 xalloc-oversized [test $HAVE_GROUP_MEMBER = 0]
 
 configure.ac:
-- 
2.27.0




Re: cast macros

2021-04-06 Thread Bruno Haible
I wrote:
> So far we have been lacking type-casts that warn for invalid use;
> it is well possible that with the PSPP macros (or with Marc's approach
> of _Generic), we can design a type-safe wrapper around gl_list.h

Here is a simplified test case: The simplest container type is a box type,
that contain just one value. Can one of you complete this code, so that
it produces the warnings / no warnings as indicated?


typedef const void ** gl_box_t;

extern gl_box_t create_box (const void *);
extern const void *box_get_value (gl_box_t);

#include "cast.h"

/* CAST_TO_FROM (TO, FROM, EXPR)
   gives a warning if EXPR is not of type FROM,
   and returns EXPR, cast to type TO.  */

struct foo;
#define ELEMENT_TYPE struct foo *
#define CREATE_BOX(V) \
  create_box (CAST_TO_FROM (const void *, ELEMENT_TYPE, V))
#define BOX_GET_VALUE(BOX) \
  CAST_TO_FROM (ELEMENT_TYPE, const void *, box_get_value (BOX))

struct foo *a;
struct foo *b;
const void *c;
void *d;
int *i;

int
main ()
{
  (void) CAST_TO_FROM (int *, const void *, c); // no warning
  (void) CAST_TO_FROM (int *, void *, d);   // no warning
  (void) CAST_TO_FROM (int *, const void *, d); // no warning
  (void) CAST_TO_FROM (int *, void *, c);   // warning
  (void) CAST_TO_FROM (int *, void *, i);   // warning
  (void) CAST_TO_FROM (int *, char *, i);   // warning

  gl_box_t box = CREATE_BOX (a);// no warning
  return BOX_GET_VALUE (box) == b;  // no warning
}





Re: gl_list API

2021-04-06 Thread Marc Nieper-Wißkirchen
Am Di., 6. Apr. 2021 um 23:04 Uhr schrieb Bruno Haible :

> > > But when designing a general utility, for all kinds of programs to use,
> > > it is inappropriate to say "storing null elements is not useful".
> >
> > I'm afraid we'll have to disagree here.
>
> In some of my uses of the gl_list module, the element is in fact a small
> integer, cast to an 'uintptr_t' and then further cast to a 'const void *'.
> If the API would forbid storing a NULL pointer, my specialized container
> type would need a workaround for storing the integer value 0.
>

NULL can also be a sensible value for "holes" in the list showing up in
algorithms that need to lazily remove elements (e.g. the list will only be
eventually compactified).


Re: Type-safe typecasts

2021-04-06 Thread Bruno Haible
Marc Nieper-Wißkirchen wrote:

> gl_list_iterator_t i = gl_list_iterator (list);
> struct foo *elt;
> while (gl_list_iterator_next (, (const void **) , NULL))
>   ++elt->bar;

This cast is dangerous: It silences the warning "passing argument 2 of
'gl_list_iterator_next' from incompatible pointer type", and is (AFAICS)
a violation of the C strict-aliasing rule [1].

> So to make my original code portable C, I would have to code
> 
> ...
> const void *e;
> while (gl_list_iterator_next (, , NULL))
>   {
> struct foo *foo = (void *) e;
> ++foo->bar;
>   }

Right, this is the way it should be written.

> The const typecast is, unfortunately, still needed to silence compiler
> warnings as the Gnulib list API suffers a bit from const-poisoning when the
> actual elements are pointers actually non-const objects.

Yes. The element type could be 'const void *' everywhere or 'void *'
everywhere. Since the generic list code does not write to these pointers,
I chose 'const void *'.

Bruno

[1] https://stackoverflow.com/questions/98650/what-is-the-strict-aliasing-rule




Re: replacement for 'join'?

2021-04-06 Thread Bruno Haible
Paul Eggert wrote:
> Yes, 'awk' is the way to go if you want more 'join' capability (not just 
> treating the entire line as the key). But if the data are not large and 
> each key takes up the whole line, 'grep' should work fine.

The data is large, unfortunately. The two input files contain lists
of symbols, when constructing a shared library. GNU libunistring has
ca. 250 symbols.

For small data, I would have transformed the first file to a 'sed'
script, that I would then apply to the second file. But HP-UX 'sed'
has a limit of 100 -e expressions per invocation.

So, the solution with 'awk' is the best I can see. Thanks again, Berny!

Bruno




Re: gl_list API

2021-04-06 Thread Bruno Haible
> > But when designing a general utility, for all kinds of programs to use,
> > it is inappropriate to say "storing null elements is not useful".
> 
> I'm afraid we'll have to disagree here.

In some of my uses of the gl_list module, the element is in fact a small
integer, cast to an 'uintptr_t' and then further cast to a 'const void *'.
If the API would forbid storing a NULL pointer, my specialized container
type would need a workaround for storing the integer value 0.

Bruno





Re: Type-safe typecasts

2021-04-06 Thread Bruno Haible
Hi Ben,

> I've found a few macros for casts useful over years. PSPP uses
> CONST_CAST and UP_CAST from the file below quite a bit:
> https://git.savannah.gnu.org/cgit/pspp.git/tree/src/libpspp/cast.h

That's pointing to a nice solution to the problem that casts don't warn in C.
I use to write code like

   void *p = ...;
   int *q = (int *) p;

because I want the reader of the program to be aware that there is a cast.

Paul uses to write code like

   void *p = ...;
   int *q = p;

because he wants the compiler to issue warnings if the type of p is changed
to something else.

It would be cool to have the advantages of both approaches:

   void *p = ...;
   int *q = SAFE_CAST (int *, p);

that would indicate that there is a cast AND warn if 'int *' and 'void *'
are not compatible.  Can you implement SAFE_CAST with the techniques from
PSPP, or is Marc's approach needed?

Btw, I like Marc's approach of using _Generic for warning purposes on newer
compilers, and still maintain compatibility with older compilers.

Bruno




Re: gl_list API

2021-04-06 Thread Paul Eggert

On 4/6/21 1:28 PM, Bruno Haible wrote:

But when designing a general utility, for all kinds of programs to use,
it is inappropriate to say "storing null elements is not useful".


I'm afraid we'll have to disagree here. But then I don't use the API so 
my comments aren't all that important.




Re: gl_list API

2021-04-06 Thread Bruno Haible
Paul Eggert wrote:
> > gl_list_iterator_next has to return two things: An element (represented by
> > a const void *) and a boolean value. As elements may be NULL
> 
> Ah, OK, then that's the problem. The API shouldn't allow null elements. 
> They're not that useful anyway.

I disagree very much with this. When you write a list type for use within
a particular program, you can say "storing null elements is not useful".
But when designing a general utility, for all kinds of programs to use,
it is inappropriate to say "storing null elements is not useful". The
program's developer decides about the representation of their data, and
if they find that NULL is a perfect representation for something, then
that's their valid choice.

> If they really are needed for some 
> lists, I suppose one could have a more-complicated API to return them 
> (by setting a bool elsewhere); but usually they aren't.

I wanted to avoid this when designing the gl_list API: to have one
API for the case that disallows NULLs, and another — more complex — API
for that case that allows NULLs. One API function for one thing. No
redundancies.

> > The const typecast is, unfortunately, still needed to silence compiler
> > warnings
> 
> Yes, that would be portable. But that cast indicates another problem 
> with the API. It should return void *, not void const * (think of strchr 
> as the model here).

strchr is a bad model here: You pass it a pointer to a read-only storage
(such as a literal string) and receive a pointer that you "can" write
to: The compiler cannot warn about
   strchr ("literal", 'e') [2] = 'x';

> The API should discourage type-casts, since they're 
> more dangerous than the alternatives.

You can't write generic containers in C that don't need type-casts,
except by adding wrapper macros that do the type casts for the user.

So far we have been lacking type-casts that warn for invalid use;
it is well possible that with the PSPP macros (or with Marc's approach
of _Generic), we can design a type-safe wrapper around gl_list.h
that is parameterized by e.g.
  #define ELEMENT_TYPE  struct foo *
  #define LIST_TYPE foo_list_t
  #define PREFIXfl_
and that does warning-free but safe casts between 'struct foo *' and
'const void *'.

That's the direction we shoud go. Not backwards by using bad models
like strchr().

Bruno




Re: New function xpalloc in module xalloc

2021-04-06 Thread Paul Eggert

On 4/6/21 12:23 PM, Marc Nieper-Wißkirchen wrote:

Where is the flaw in my reasoning?


Oh, you're right; any nonnegative signed integer value will fit into 
uintmax_t. (Perhaps this wasn't always true in older standards, but it's 
true of the recent C standard.)


So that cast should work. Still, I wouldn't use a cast; I'd assign the 
integer to an uintmax_t local value, and use that.


And anyway, for this particular case widening to uintmax_t is not 
necessary; it complicates the code and can make it less efficient on 
some platforms by forcing an unnecessary conversion at runtime. So let's 
not do that.




Re: Type-safe typecasts

2021-04-06 Thread Bruno Haible
Ben Pfaff wrote:
> I've found a few macros for casts useful over years. PSPP uses
> CONST_CAST and UP_CAST from the file below quite a bit:
> https://git.savannah.gnu.org/cgit/pspp.git/tree/src/libpspp/cast.h

Other projects may want to use these macros as well.
That would make a great contribution to Gnulib!

Bruno




Re: Type-safe typecasts

2021-04-06 Thread Marc Nieper-Wißkirchen
CCing Bruno because of his involvement with the Gnulib list modules.

Disallowing NULL list elements could break existing code that actually uses
them but returning elements with type void * instead of const void * would
be much less incompatible. Code can trivially be ported to such an updated
interface (for example, in my snippet above, I would have to replace 'const
void *e' with 'void *e', All in all, it would make working with the code a
lot easier.

For lists where there are no non-null list elements, one could add

void *gl_list_iterator_next_element_or_null (gl_list_iterator_t iter,
gl_list_node_t *node_ptr)
{
  void *e;
  return gl_list_iterator_next (iter, , node_ptr) ? e : NULL;
}

to the global API (modulo a better name for the procedure).


Am Di., 6. Apr. 2021 um 21:20 Uhr schrieb Paul Eggert :

> On 4/6/21 12:13 PM, Marc Nieper-Wißkirchen wrote:
> > gl_list_iterator_next has to return two things: An element (represented
> by
> > a const void *) and a boolean value. As elements may be NULL
>
> Ah, OK, then that's the problem. The API shouldn't allow null elements.
> They're not that useful anyway. If they really are needed for some
> lists, I suppose one could have a more-complicated API to return them
> (by setting a bool elsewhere); but usually they aren't.
>
> > So to make my original code portable C, I would have to code
> >
> > ...
> > const void *e;
> > while (gl_list_iterator_next (, , NULL))
> >   {
> > struct foo *foo = (void *) e;
> > ++foo->bar;
> >   }
> > ...
> >
> > The const typecast is, unfortunately, still needed to silence compiler
> > warnings
>
> Yes, that would be portable. But that cast indicates another problem
> with the API. It should return void *, not void const * (think of strchr
> as the model here). The API should discourage type-casts, since they're
> more dangerous than the alternatives.
>


Re: Type-safe typecasts

2021-04-06 Thread Ben Pfaff
On Tue, Apr 6, 2021 at 12:18 AM Marc Nieper-Wißkirchen
 wrote:
> I have been wondering whether it makes sense to add a small utility trying to 
> make typecasts in C as type-safe as possible.

I've found a few macros for casts useful over years. PSPP uses
CONST_CAST and UP_CAST from the file below quite a bit:
https://git.savannah.gnu.org/cgit/pspp.git/tree/src/libpspp/cast.h



Re: New function xpalloc in module xalloc

2021-04-06 Thread Marc Nieper-Wißkirchen
Am Di., 6. Apr. 2021 um 21:05 Uhr schrieb Paul Eggert :

> On 4/5/21 11:48 PM, Marc Nieper-Wißkirchen wrote:
> > SIZE_MAX < (uintmax_t) nbytes
>
> Eeuuw! That's a cure worse than the disease. Among other things, there
> is no guarantee that PTRDIFF_MAX <= UINTMAX_MAX so in theory the
> comparison could go completely awry with a sufficiently-large NBYTES.
>

I checked the ISO C18 standard before I wrote this. :) In 6.2.5 it says
that for every signed type there is an unsigned type of the same width.
Given the definitions of INTMAX_MAX and UINTMAX_MAX, I then concluded

PTRDIFF_MAX <= INTMAX_MAX <= UINTMAX_MAX.

Where is the flaw in my reasoning?


Re: Type-safe typecasts

2021-04-06 Thread Paul Eggert

On 4/6/21 12:13 PM, Marc Nieper-Wißkirchen wrote:

gl_list_iterator_next has to return two things: An element (represented by
a const void *) and a boolean value. As elements may be NULL


Ah, OK, then that's the problem. The API shouldn't allow null elements. 
They're not that useful anyway. If they really are needed for some 
lists, I suppose one could have a more-complicated API to return them 
(by setting a bool elsewhere); but usually they aren't.



So to make my original code portable C, I would have to code

...
const void *e;
while (gl_list_iterator_next (, , NULL))
  {
struct foo *foo = (void *) e;
++foo->bar;
  }
...

The const typecast is, unfortunately, still needed to silence compiler
warnings


Yes, that would be portable. But that cast indicates another problem 
with the API. It should return void *, not void const * (think of strchr 
as the model here). The API should discourage type-casts, since they're 
more dangerous than the alternatives.




Re: Type-safe typecasts

2021-04-06 Thread Marc Nieper-Wißkirchen
Hi Paul,

thanks!

By the way, the snippet you gave is not portable C code, as it assumes
> that 'void *' and 'struct foo *' have the same machine representation.
> This is not necessarily true on (admittedly now-rare) machines that have
> different flavors of pointers. I suspect the main problem here is either
> in the calling code, or in the API for gl_list_iterator_next: if it
> returned a possibly-null value of type 'const void *' instead of storing
> the result through a 'const void **' pointer, the calling code wouldn't
> have gotten into this portability mess.
>

gl_list_iterator_next has to return two things: An element (represented by
a const void *) and a boolean value. As elements may be NULL, the boolean
value is really needed. Of course, one could switch the actual return
parameter with the one returned through a pointer but then it wouldn't look
as nice in while or for-loops.

So to make my original code portable C, I would have to code

...
const void *e;
while (gl_list_iterator_next (, , NULL))
  {
struct foo *foo = (void *) e;
++foo->bar;
  }
...

The const typecast is, unfortunately, still needed to silence compiler
warnings as the Gnulib list API suffers a bit from const-poisoning when the
actual elements are pointers actually non-const objects.

Marc


Re: New function xpalloc in module xalloc

2021-04-06 Thread Paul Eggert

On 4/5/21 11:48 PM, Marc Nieper-Wißkirchen wrote:

SIZE_MAX < (uintmax_t) nbytes


Eeuuw! That's a cure worse than the disease. Among other things, there 
is no guarantee that PTRDIFF_MAX <= UINTMAX_MAX so in theory the 
comparison could go completely awry with a sufficiently-large NBYTES.


"Don't use casts." Words to live by.



Re: Type-safe typecasts

2021-04-06 Thread Paul Eggert

On 4/6/21 12:18 AM, Marc Nieper-Wißkirchen wrote:

So what I have in mind are macros that do a type conversion from A to B and
that signal an error on modern compilers if the input is not of type A. For
this, the C11 construct _Generic can be used.


Not sure it's worth the aggravation. Most of the time, the type you're 
converting to void * is obvious from context. (However, I guess I 
wouldn't object in code that I don't have to look at myself. :-)


By the way, the snippet you gave is not portable C code, as it assumes 
that 'void *' and 'struct foo *' have the same machine representation. 
This is not necessarily true on (admittedly now-rare) machines that have 
different flavors of pointers. I suspect the main problem here is either 
in the calling code, or in the API for gl_list_iterator_next: if it 
returned a possibly-null value of type 'const void *' instead of storing 
the result through a 'const void **' pointer, the calling code wouldn't 
have gotten into this portability mess.




Re: replacement for 'join'?

2021-04-06 Thread Paul Eggert

On 4/6/21 2:40 AM, Bernhard Voelker wrote:

grep -Fvf file2 file1

I started with that, too, but it is problematic because:
a) it doesn't do a whole-word search ... and 'grep -w' seems not to be portable,
b) it doesn't limit the matching on the key field.
And messing with regular expressions seems to be fragile as well, because the
key field may contain problematic characters.


Yes, 'awk' is the way to go if you want more 'join' capability (not just 
treating the entire line as the key). But if the data are not large and 
each key takes up the whole line, 'grep' should work fine.


Regular expressions are not a problem, as the -F option disables them.



Re: replacement for 'join'?

2021-04-06 Thread Bernhard Voelker
On 4/6/21 4:24 AM, Paul Eggert wrote:
> grep -Fvf file2 file1

I started with that, too, but it is problematic because:
a) it doesn't do a whole-word search ... and 'grep -w' seems not to be portable,
b) it doesn't limit the matching on the key field.
And messing with regular expressions seems to be fragile as well, because the
key field may contain problematic characters.

Have a nice day,
Berny



Type-safe typecasts

2021-04-06 Thread Marc Nieper-Wißkirchen
Hi,

I have been wondering whether it makes sense to add a small utility trying
to make typecasts in C as type-safe as possible.

The problem is that typecasts are sometimes unavoidable.  For an example,
let's take a look at the following snippet using Gnulib's xlist module:

struct foo
{
  int bar;
};
gl_list_t list = gl_list_create_empty (GL_ARRAY_LIST, NULL, NULL, NULL,
false);
struct foo foo = { .bar = 1 };
gl_list_add_last (list, );
gl_list_iterator_t i = gl_list_iterator (list);
struct foo *elt;
while (gl_list_iterator_next (, (const void **) , NULL))
  ++elt->bar;
gl_list_iterator_free ();

Here, the typecast '(const void **)' is needed as 'gl_list_iterator_next'
expects an argument of that type and because 'struct foo **elt' is not
implicitly convertible to 'const void **' (in fact, even not to 'void **').

The problem with this typecast is that the compiler wouldn't have
complained if I had forgotten to write the address operator '&' before
'elt'.

So what I have in mind are macros that do a type conversion from A to B and
that signal an error on modern compilers if the input is not of type A. For
this, the C11 construct _Generic can be used.

The macros could look like

CAST (, , )

CVR_CAST (, expr) /* removes cvr qualifier */

ADDRESS_CAST (, , expr)

expanding, respectively, into

_Generic ((), (): (() ()))

_Generic ((), (): (() ()),
(const ): (( ()),
(const volatile ): (() ()),
...)

_Generic ((), (): ((() *) &()))

The names and the exact semantics and number of macros are, of course,
debatable.

Marc


Re: New function xpalloc in module xalloc

2021-04-06 Thread Marc Nieper-Wißkirchen
Hi Paul,

Am Di., 6. Apr. 2021 um 05:19 Uhr schrieb Paul Eggert :

> On 4/3/21 11:17 PM, Marc Nieper-Wißkirchen wrote:
> > Does the comparison make any sense, by the way?
>
> Yes, although it's needed only on unusual (and these days perhaps
> theoretical?) platforms where SIZE_MAX < PTRDIFF_MAX.
>

Ah, okay. I didn't think of this possibility.


> I hadn't noticed the issue, as the projects I contribute to (coreutils,
> etc.) compile with -Wno-sign-compare because gcc -Wsign-compare has too
> many false alarms.
>
> I prefer to avoid casts merely to pacify GCC (as casts are too
> error-prone), so I installed the attached. I hope it works for you. (If
> not, perhaps you can use -Wno-sign-compare too)
>

The fix works for me. Thank you very much! IMO, it's much better than
asking to compile with "-Wno-sign-compare' because this can (like type
casts) silence other, non-false positive warnings.

Speaking of type casts, I don't think they would have been bad here because
they would document exactly what was going on here. By writing

SIZE_MAX < (uintmax_t) nbytes

the otherwise implicit type conversion would have been made explicit and
using 'uintmax_t' also documents that it is expected that the width of
'nbytes' can be greater than the width of 'size_t'.


> This underscores the fact that the xalloc module should use idx_t
> instead of size_t pretty much everywhere. If xrealloc's size arg were of
> idx_t we wouldn't need any of this hacking. I realize that replacing
> size_t with idx_t is an incompatible change to xalloc's API, but it's
> time callers started using signed instead of unsigned byte counts as
> that helps avoid and/or catch integer-overflow errors better. I'll add
> that to my list of things to do for Gnulib.
>

The philosophy about idx_t could be worth an entry in the manual.

Marc