On 2026-02-19T12:52:00+0100, Alejandro Colomar wrote:
> Hi Ian,
>
> On 2026-02-19T10:30:09+0000, Ian Collier via Mutt-dev wrote:
> > On Thu, Feb 19, 2026 at 01:46:34AM +0100, Alejandro Colomar via Mutt-dev
> > wrote:
> > > On 2026-02-18T23:42:28+0000, Ian Collier via Mutt-dev wrote:
> > > > I do not understand why strchrnul and strcmp are even being invoked
> > > > here.
> >
> > > Because they are necessary. (Well, strcmp(3) is not, as you could do
> > > manual byte operations, but it's simple, more readable, and optimized
> > > out.) strchrnul(3) is necessary.
> >
> > > strchr(s, '\0') != NULL
> >
> > > would evaluate to true, which is a misbehavior: it would treat '\0' as
> > > white space, while it is not.
> >
> > The nul character is a bit of an edge case when considering strxxx
> > functions,
> > of course. But that's easily fixed:
> >
> > c && strchr(s, c) != NULL
> >
> > No doubt you'll now be telling me that you don't want to evaluate c twice
> > in the context of a macro. Well then, use an inline function. :-)
>
> An inline function with that would be an alternative. A macro is more
> compact, though, and can be implemented as a one-liner. When
> implementing several of these, it can help readability of the entire set
> of APIs if they fit in a compact space and they are very similar:
>
> // isascii_c - is [:ascii:] C-locale
> #define isascii_c(c) (!!strchr(CTYPE_ASCII_C, c))
> #define iscntrl_c(c) (!!strchr(CTYPE_CNTRL_C, c))
> #define islower_c(c) (!streq(strchrnul(CTYPE_LOWER_C, c), ""))
> #define isupper_c(c) (!streq(strchrnul(CTYPE_UPPER_C, c), ""))
> #define isdigit_c(c) (!streq(strchrnul(CTYPE_DIGIT_C, c), ""))
> #define ispunct_c(c) (!streq(strchrnul(CTYPE_PUNCT_C, c), ""))
> #define isspace_c(c) (!streq(strchrnul(CTYPE_SPACE_C, c), ""))
> #define isalpha_c(c) (!streq(strchrnul(CTYPE_ALPHA_C, c), ""))
> #define isalnum_c(c) (!streq(strchrnul(CTYPE_ALNUM_C, c), ""))
> #define isgraph_c(c) (!streq(strchrnul(CTYPE_GRAPH_C, c), ""))
> #define isprint_c(c) (!streq(strchrnul(CTYPE_PRINT_C, c), ""))
> #define isxdigit_c(c) (!streq(strchrnul(CTYPE_XDIGIT_C, c), ""))
>
> Here's a comparison of the assembly they produce:
>
> alx@devuan:~/tmp$ diff -u isspace?.c
> --- isspace1.c 2026-02-19 12:43:01.016632233 +0100
> +++ isspace2.c 2026-02-19 12:39:40.987744755 +0100
> @@ -1,8 +1,7 @@
> #include <string.h>
> #define CTYPE_SPACE_C " \t\n\v\f\r"
> -#define streq(a,b) (strcmp(a,b) == 0)
> bool
> isspace_c(int c)
> {
> - return !streq(strchrnul(CTYPE_SPACE_C, c), "");
> + return c && strchr(CTYPE_SPACE_C, c) != NULL;
> }
> alx@devuan:~/tmp$ gcc -S -O2 isspace?.c
> alx@devuan:~/tmp$ diff -u isspace?.s
> --- isspace1.s 2026-02-19 12:44:24.264776257 +0100
> +++ isspace2.s 2026-02-19 12:44:24.276776354 +0100
> @@ -1,4 +1,4 @@
> - .file "isspace1.c"
> + .file "isspace2.c"
> .text
> .section .rodata.str1.1,"aMS",@progbits,1
> .LC0:
> @@ -10,16 +10,23 @@
> isspace_c:
> .LFB0:
> .cfi_startproc
> + testl %edi, %edi
> + je .L5
> subq $8, %rsp
> .cfi_def_cfa_offset 16
> movl %edi, %esi
> leaq .LC0(%rip), %rdi
> - call strchrnul@PLT
> - cmpb $0, (%rax)
> + call strchr@PLT
> + testq %rax, %rax
> setne %al
> addq $8, %rsp
> .cfi_def_cfa_offset 8
> ret
> + .p2align 4,,10
> + .p2align 3
> +.L5:
> + xorl %eax, %eax
> + ret
> .cfi_endproc
> .LFE0:
> .size isspace_c, .-isspace_c
>
> The strchrnul(3) version seems to have significantly less instructions.
>
> And strchrnul(3) is internally simpler than strchr(3). In musl, for
> example, strchrnul(3) is used to implement strchr(3).
Here's the comparison between isspace_c() using non-system strchrnul()
vs the brancy implementation with strchr(3):
alx@devuan:~/tmp$ diff -u isspace[23].c
--- isspace2.c 2026-02-19 12:39:40.987744755 +0100
+++ isspace3.c 2026-02-19 14:26:39.575137468 +0100
@@ -1,7 +1,14 @@
#include <string.h>
#define CTYPE_SPACE_C " \t\n\v\f\r"
+#define streq(a,b) (strcmp(a,b) == 0)
+#define strnul(s) strchr(s, '\0')
+static inline char *
+my_strchrnul(const char *s, int c)
+{
+ return strchr(s, c) ?: strnul(s);
+}
bool
isspace_c(int c)
{
- return c && strchr(CTYPE_SPACE_C, c) != NULL;
+ return !streq(my_strchrnul(CTYPE_SPACE_C, c), "");
}
alx@devuan:~/tmp$ gcc -S -O2 isspace[23].c
alx@devuan:~/tmp$ diff -u isspace[23].s
--- isspace2.s 2026-02-19 14:27:20.193097036 +0100
+++ isspace3.s 2026-02-19 14:27:20.205097129 +0100
@@ -1,4 +1,4 @@
- .file "isspace2.c"
+ .file "isspace3.c"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
@@ -8,27 +8,25 @@
.globl isspace_c
.type isspace_c, @function
isspace_c:
-.LFB0:
+.LFB1:
.cfi_startproc
- testl %edi, %edi
- je .L5
+ movl %edi, %esi
subq $8, %rsp
.cfi_def_cfa_offset 16
- movl %edi, %esi
leaq .LC0(%rip), %rdi
call strchr@PLT
- testq %rax, %rax
+ movq %rax, %rdx
+ xorl %eax, %eax
+ testq %rdx, %rdx
+ je .L1
+ cmpb $0, (%rdx)
setne %al
+.L1:
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
- .p2align 4,,10
- .p2align 3
-.L5:
- xorl %eax, %eax
- ret
.cfi_endproc
-.LFE0:
+.LFE1:
.size isspace_c, .-isspace_c
.ident "GCC: (Debian 15.2.0-13) 15.2.0"
.section .note.GNU-stack,"",@progbits
I expect the strchr(3) version would be faster, since the branch is
rarely used, and the main body is smaller. However, I wouldn't worry
about performance in systems that don't implement strchrnul(3); those
are rare, and the difference would be small-ish. Let the systems
implement strchrnul(3) if they care about performance.
Cheers,
Alex
--
<https://www.alejandro-colomar.es>
signature.asc
Description: PGP signature
