Author: brane
Date: Sun Jul 27 22:44:04 2025
New Revision: 1927498
Log:
On the user-defined-authn branch: Remove the character classification
table. It turns out that range checks are faster than table lookups.
Which I should have known.
* src/genctype.py: Removed.
* src/syntax.c:
- Declare string functions through <apr_want.h>
- Remove the character classification table and macros.
(ct_isalnum): New; finds ASCII digits, uppercase and lowercase letters.
(ct_istoken, ct_istoken68): Reimplement with range checks.
(skip_space): Likewses; use strspn instead.
Deleted:
serf/branches/user-defined-authn/src/genctype.py
Modified:
serf/branches/user-defined-authn/src/syntax.c
Modified: serf/branches/user-defined-authn/src/syntax.c
==============================================================================
--- serf/branches/user-defined-authn/src/syntax.c Sun Jul 27 20:10:02
2025 (r1927497)
+++ serf/branches/user-defined-authn/src/syntax.c Sun Jul 27 22:44:04
2025 (r1927498)
@@ -22,24 +22,22 @@
#include <apr_pools.h>
#include <apr_hash.h>
+#define APR_WANT_STRFUNC
+#include <apr_want.h>
+
#include "serf.h"
#include "serf_private.h"
-/* Character classes */
-#define CT_ASCII 0x0001
-#define CT_CNTRL 0x0002
-#define CT_SPACE 0x0004
-#define CT_PUNCT 0x0008
-#define CT_DIGIT 0x0010
-#define CT_UPPER 0x0020
-#define CT_LOWER 0x0040
-#define CT_XALPHA 0x0080
-#define CT_UTF8_CONT 0x0100
-#define CT_UTF8_LEAD 0x0200
+/*******************************************************************/
+/* Character class lookup */
-/* Only space and the horizontal tab are treated as whitespace. */
-#define CT_WHITESPACE 0x010000
+static APR_INLINE int ct_isalnum(char c)
+{
+ return ((c >= '0' && c <= '9')
+ || (c >= 'A' && c <= 'Z')
+ || (c >= 'a' && c <= 'z'));
+}
/* A `token` is a sequence of ASCII digits, lowercase and uppercase letters,
and the following punctiation marks:
@@ -48,7 +46,11 @@
See: https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.2
*/
-#define CT_TOKEN 0x020000
+static APR_INLINE int ct_istoken(char c)
+{
+ static const char punct[] = "!#$%&'*+-.^_`|~";
+ return c && (ct_isalnum(c) || strchr(punct, c));
+}
/* A `token68` is a sequence of ASCII digits, lowercase and uppercase letters,
and the following punctiation marks:
@@ -58,283 +60,15 @@
followed by zero or more `=` signs.
See: https://www.rfc-editor.org/rfc/rfc9110.html#section-11.2
*/
-#define CT_TOKEN68 0x040000
-
-
-/* ASCII + UTF-8 character table, stolen wholesale from Subversion. */
-static const apr_uint32_t char_table[256] =
-{
- /* **** DO NOT EDIT! ****
- This table was generated by genctype.py, make changes there. */
- /* nul */ CT_ASCII | CT_CNTRL,
- /* soh */ CT_ASCII | CT_CNTRL,
- /* stx */ CT_ASCII | CT_CNTRL,
- /* etx */ CT_ASCII | CT_CNTRL,
- /* eot */ CT_ASCII | CT_CNTRL,
- /* enq */ CT_ASCII | CT_CNTRL,
- /* ack */ CT_ASCII | CT_CNTRL,
- /* bel */ CT_ASCII | CT_CNTRL,
- /* bs */ CT_ASCII | CT_CNTRL,
- /* ht */ CT_ASCII | CT_CNTRL | CT_SPACE | CT_WHITESPACE,
- /* nl */ CT_ASCII | CT_CNTRL | CT_SPACE,
- /* vt */ CT_ASCII | CT_CNTRL | CT_SPACE,
- /* np */ CT_ASCII | CT_CNTRL | CT_SPACE,
- /* cr */ CT_ASCII | CT_CNTRL | CT_SPACE,
- /* so */ CT_ASCII | CT_CNTRL,
- /* si */ CT_ASCII | CT_CNTRL,
- /* dle */ CT_ASCII | CT_CNTRL,
- /* dc1 */ CT_ASCII | CT_CNTRL,
- /* dc2 */ CT_ASCII | CT_CNTRL,
- /* dc3 */ CT_ASCII | CT_CNTRL,
- /* dc4 */ CT_ASCII | CT_CNTRL,
- /* nak */ CT_ASCII | CT_CNTRL,
- /* syn */ CT_ASCII | CT_CNTRL,
- /* etb */ CT_ASCII | CT_CNTRL,
- /* can */ CT_ASCII | CT_CNTRL,
- /* em */ CT_ASCII | CT_CNTRL,
- /* sub */ CT_ASCII | CT_CNTRL,
- /* esc */ CT_ASCII | CT_CNTRL,
- /* fs */ CT_ASCII | CT_CNTRL,
- /* gs */ CT_ASCII | CT_CNTRL,
- /* rs */ CT_ASCII | CT_CNTRL,
- /* us */ CT_ASCII | CT_CNTRL,
- /* sp */ CT_ASCII | CT_SPACE | CT_WHITESPACE,
- /* ! */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* " */ CT_ASCII | CT_PUNCT,
- /* # */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* $ */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* % */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* & */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* ' */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* ( */ CT_ASCII | CT_PUNCT,
- /* ) */ CT_ASCII | CT_PUNCT,
- /* * */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* + */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
- /* , */ CT_ASCII | CT_PUNCT,
- /* - */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
- /* . */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
- /* / */ CT_ASCII | CT_PUNCT | CT_TOKEN68,
- /* 0 */ CT_ASCII | CT_DIGIT,
- /* 1 */ CT_ASCII | CT_DIGIT,
- /* 2 */ CT_ASCII | CT_DIGIT,
- /* 3 */ CT_ASCII | CT_DIGIT,
- /* 4 */ CT_ASCII | CT_DIGIT,
- /* 5 */ CT_ASCII | CT_DIGIT,
- /* 6 */ CT_ASCII | CT_DIGIT,
- /* 7 */ CT_ASCII | CT_DIGIT,
- /* 8 */ CT_ASCII | CT_DIGIT,
- /* 9 */ CT_ASCII | CT_DIGIT,
- /* : */ CT_ASCII | CT_PUNCT,
- /* ; */ CT_ASCII | CT_PUNCT,
- /* < */ CT_ASCII | CT_PUNCT,
- /* = */ CT_ASCII | CT_PUNCT,
- /* > */ CT_ASCII | CT_PUNCT,
- /* ? */ CT_ASCII | CT_PUNCT,
- /* @ */ CT_ASCII | CT_PUNCT,
- /* A */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* B */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* C */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* D */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* E */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* F */ CT_ASCII | CT_UPPER | CT_XALPHA,
- /* G */ CT_ASCII | CT_UPPER,
- /* H */ CT_ASCII | CT_UPPER,
- /* I */ CT_ASCII | CT_UPPER,
- /* J */ CT_ASCII | CT_UPPER,
- /* K */ CT_ASCII | CT_UPPER,
- /* L */ CT_ASCII | CT_UPPER,
- /* M */ CT_ASCII | CT_UPPER,
- /* N */ CT_ASCII | CT_UPPER,
- /* O */ CT_ASCII | CT_UPPER,
- /* P */ CT_ASCII | CT_UPPER,
- /* Q */ CT_ASCII | CT_UPPER,
- /* R */ CT_ASCII | CT_UPPER,
- /* S */ CT_ASCII | CT_UPPER,
- /* T */ CT_ASCII | CT_UPPER,
- /* U */ CT_ASCII | CT_UPPER,
- /* V */ CT_ASCII | CT_UPPER,
- /* W */ CT_ASCII | CT_UPPER,
- /* X */ CT_ASCII | CT_UPPER,
- /* Y */ CT_ASCII | CT_UPPER,
- /* Z */ CT_ASCII | CT_UPPER,
- /* [ */ CT_ASCII | CT_PUNCT,
- /* \ */ CT_ASCII | CT_PUNCT,
- /* ] */ CT_ASCII | CT_PUNCT,
- /* ^ */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* _ */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
- /* ` */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* a */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* b */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* c */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* d */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* e */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* f */ CT_ASCII | CT_LOWER | CT_XALPHA,
- /* g */ CT_ASCII | CT_LOWER,
- /* h */ CT_ASCII | CT_LOWER,
- /* i */ CT_ASCII | CT_LOWER,
- /* j */ CT_ASCII | CT_LOWER,
- /* k */ CT_ASCII | CT_LOWER,
- /* l */ CT_ASCII | CT_LOWER,
- /* m */ CT_ASCII | CT_LOWER,
- /* n */ CT_ASCII | CT_LOWER,
- /* o */ CT_ASCII | CT_LOWER,
- /* p */ CT_ASCII | CT_LOWER,
- /* q */ CT_ASCII | CT_LOWER,
- /* r */ CT_ASCII | CT_LOWER,
- /* s */ CT_ASCII | CT_LOWER,
- /* t */ CT_ASCII | CT_LOWER,
- /* u */ CT_ASCII | CT_LOWER,
- /* v */ CT_ASCII | CT_LOWER,
- /* w */ CT_ASCII | CT_LOWER,
- /* x */ CT_ASCII | CT_LOWER,
- /* y */ CT_ASCII | CT_LOWER,
- /* z */ CT_ASCII | CT_LOWER,
- /* { */ CT_ASCII | CT_PUNCT,
- /* | */ CT_ASCII | CT_PUNCT | CT_TOKEN,
- /* } */ CT_ASCII | CT_PUNCT,
- /* ~ */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
- /* del */ CT_ASCII | CT_CNTRL,
- /* x80 */ CT_UTF8_CONT,
- /* x81 */ CT_UTF8_CONT,
- /* x82 */ CT_UTF8_CONT,
- /* x83 */ CT_UTF8_CONT,
- /* x84 */ CT_UTF8_CONT,
- /* x85 */ CT_UTF8_CONT,
- /* x86 */ CT_UTF8_CONT,
- /* x87 */ CT_UTF8_CONT,
- /* x88 */ CT_UTF8_CONT,
- /* x89 */ CT_UTF8_CONT,
- /* x8a */ CT_UTF8_CONT,
- /* x8b */ CT_UTF8_CONT,
- /* x8c */ CT_UTF8_CONT,
- /* x8d */ CT_UTF8_CONT,
- /* x8e */ CT_UTF8_CONT,
- /* x8f */ CT_UTF8_CONT,
- /* x90 */ CT_UTF8_CONT,
- /* x91 */ CT_UTF8_CONT,
- /* x92 */ CT_UTF8_CONT,
- /* x93 */ CT_UTF8_CONT,
- /* x94 */ CT_UTF8_CONT,
- /* x95 */ CT_UTF8_CONT,
- /* x96 */ CT_UTF8_CONT,
- /* x97 */ CT_UTF8_CONT,
- /* x98 */ CT_UTF8_CONT,
- /* x99 */ CT_UTF8_CONT,
- /* x9a */ CT_UTF8_CONT,
- /* x9b */ CT_UTF8_CONT,
- /* x9c */ CT_UTF8_CONT,
- /* x9d */ CT_UTF8_CONT,
- /* x9e */ CT_UTF8_CONT,
- /* x9f */ CT_UTF8_CONT,
- /* xa0 */ CT_UTF8_CONT,
- /* xa1 */ CT_UTF8_CONT,
- /* xa2 */ CT_UTF8_CONT,
- /* xa3 */ CT_UTF8_CONT,
- /* xa4 */ CT_UTF8_CONT,
- /* xa5 */ CT_UTF8_CONT,
- /* xa6 */ CT_UTF8_CONT,
- /* xa7 */ CT_UTF8_CONT,
- /* xa8 */ CT_UTF8_CONT,
- /* xa9 */ CT_UTF8_CONT,
- /* xaa */ CT_UTF8_CONT,
- /* xab */ CT_UTF8_CONT,
- /* xac */ CT_UTF8_CONT,
- /* xad */ CT_UTF8_CONT,
- /* xae */ CT_UTF8_CONT,
- /* xaf */ CT_UTF8_CONT,
- /* xb0 */ CT_UTF8_CONT,
- /* xb1 */ CT_UTF8_CONT,
- /* xb2 */ CT_UTF8_CONT,
- /* xb3 */ CT_UTF8_CONT,
- /* xb4 */ CT_UTF8_CONT,
- /* xb5 */ CT_UTF8_CONT,
- /* xb6 */ CT_UTF8_CONT,
- /* xb7 */ CT_UTF8_CONT,
- /* xb8 */ CT_UTF8_CONT,
- /* xb9 */ CT_UTF8_CONT,
- /* xba */ CT_UTF8_CONT,
- /* xbb */ CT_UTF8_CONT,
- /* xbc */ CT_UTF8_CONT,
- /* xbd */ CT_UTF8_CONT,
- /* xc5 */ CT_UTF8_LEAD,
- /* xc6 */ CT_UTF8_LEAD,
- /* xc7 */ CT_UTF8_LEAD,
- /* xc8 */ CT_UTF8_LEAD,
- /* xc9 */ CT_UTF8_LEAD,
- /* xca */ CT_UTF8_LEAD,
- /* xcb */ CT_UTF8_LEAD,
- /* xcc */ CT_UTF8_LEAD,
- /* xcd */ CT_UTF8_LEAD,
- /* xce */ CT_UTF8_LEAD,
- /* xcf */ CT_UTF8_LEAD,
- /* xd0 */ CT_UTF8_LEAD,
- /* xd1 */ CT_UTF8_LEAD,
- /* xd2 */ CT_UTF8_LEAD,
- /* xd3 */ CT_UTF8_LEAD,
- /* xd4 */ CT_UTF8_LEAD,
- /* xd5 */ CT_UTF8_LEAD,
- /* xd6 */ CT_UTF8_LEAD,
- /* xd7 */ CT_UTF8_LEAD,
- /* xd8 */ CT_UTF8_LEAD,
- /* xd9 */ CT_UTF8_LEAD,
- /* xda */ CT_UTF8_LEAD,
- /* xdb */ CT_UTF8_LEAD,
- /* xdc */ CT_UTF8_LEAD,
- /* xdd */ CT_UTF8_LEAD,
- /* xde */ CT_UTF8_LEAD,
- /* xdf */ CT_UTF8_LEAD,
- /* xe0 */ 0,
- /* xe1 */ CT_UTF8_LEAD,
- /* xe2 */ CT_UTF8_LEAD,
- /* xe3 */ CT_UTF8_LEAD,
- /* xe4 */ CT_UTF8_LEAD,
- /* xe5 */ CT_UTF8_LEAD,
- /* xe6 */ CT_UTF8_LEAD,
- /* xe7 */ CT_UTF8_LEAD,
- /* xe8 */ CT_UTF8_LEAD,
- /* xe9 */ CT_UTF8_LEAD,
- /* xea */ CT_UTF8_LEAD,
- /* xeb */ CT_UTF8_LEAD,
- /* xec */ CT_UTF8_LEAD,
- /* xed */ CT_UTF8_LEAD,
- /* xee */ CT_UTF8_LEAD,
- /* xef */ CT_UTF8_LEAD,
- /* xf0 */ 0,
- /* xf1 */ CT_UTF8_LEAD,
- /* xf2 */ CT_UTF8_LEAD,
- /* xf3 */ CT_UTF8_LEAD,
- /* xf4 */ CT_UTF8_LEAD,
- /* xf5 */ CT_UTF8_LEAD,
- /* xf6 */ CT_UTF8_LEAD,
- /* xf7 */ CT_UTF8_LEAD,
- /* xf8 */ 0,
- /* xf9 */ CT_UTF8_LEAD,
- /* xfa */ CT_UTF8_LEAD,
- /* xfb */ CT_UTF8_LEAD,
- /* xfc */ 0,
- /* xfd */ CT_UTF8_LEAD,
- /* xfe */ 0,
- /* xff */ 0
-};
-
-/* Character class lookup */
-static APR_INLINE int ct_isspace(char c)
-{
- return char_table[0xff & (unsigned char)c] & CT_WHITESPACE;
-}
-
-static APR_INLINE int ct_istoken(char c)
-{
- static const apr_uint32_t ct = CT_TOKEN | CT_DIGIT | CT_UPPER | CT_LOWER;
- return ct & char_table[0xff & (unsigned char)c];
-}
-
static APR_INLINE int ct_istoken68(char c)
{
- static const apr_uint32_t ct = CT_TOKEN68 | CT_DIGIT | CT_UPPER | CT_LOWER;
- return ct & char_table[0xff & (unsigned char)c];
+ static const char punct[] = "-._~+/";
+ return c && (ct_isalnum(c) || strchr(punct, c));
}
+/*******************************************************************/
+/* Syntactic elements. */
+
/* Fold ASCII to lowercase. */
static APR_INLINE char ct_tolower(char c)
{
@@ -346,14 +80,14 @@ static APR_INLINE char ct_tolower(char c
return c;
}
-
+/* Skip spaces. */
static const char *skip_space(const char *src)
{
- while (ct_isspace(*src))
- ++src;
- return src;
+ /* In HTTP land, only tab and space count as whitespace. */
+ return src + strspn(src, " \t");
}
+/* Skip token68 */
static const char *skip_token68(const char *src)
{
while (ct_istoken68(*src))
@@ -428,6 +162,8 @@ static const char *copy_token(char **dst
return src;
}
+/*******************************************************************/
+/* Internal API */
apr_hash_t *serf__parse_authn_parameters(const char *attrs, apr_pool_t *pool)
{