Author: brane Date: Sun Jul 27 22:44:04 2025 New Revision: 1927498 Log: On the user-defined-authn branch: Remove the character classification table. It turns out that range checks are faster than table lookups. Which I should have known.
* src/genctype.py: Removed. * src/syntax.c: - Declare string functions through <apr_want.h> - Remove the character classification table and macros. (ct_isalnum): New; finds ASCII digits, uppercase and lowercase letters. (ct_istoken, ct_istoken68): Reimplement with range checks. (skip_space): Likewses; use strspn instead. Deleted: serf/branches/user-defined-authn/src/genctype.py Modified: serf/branches/user-defined-authn/src/syntax.c Modified: serf/branches/user-defined-authn/src/syntax.c ============================================================================== --- serf/branches/user-defined-authn/src/syntax.c Sun Jul 27 20:10:02 2025 (r1927497) +++ serf/branches/user-defined-authn/src/syntax.c Sun Jul 27 22:44:04 2025 (r1927498) @@ -22,24 +22,22 @@ #include <apr_pools.h> #include <apr_hash.h> +#define APR_WANT_STRFUNC +#include <apr_want.h> + #include "serf.h" #include "serf_private.h" -/* Character classes */ -#define CT_ASCII 0x0001 -#define CT_CNTRL 0x0002 -#define CT_SPACE 0x0004 -#define CT_PUNCT 0x0008 -#define CT_DIGIT 0x0010 -#define CT_UPPER 0x0020 -#define CT_LOWER 0x0040 -#define CT_XALPHA 0x0080 -#define CT_UTF8_CONT 0x0100 -#define CT_UTF8_LEAD 0x0200 +/*******************************************************************/ +/* Character class lookup */ -/* Only space and the horizontal tab are treated as whitespace. */ -#define CT_WHITESPACE 0x010000 +static APR_INLINE int ct_isalnum(char c) +{ + return ((c >= '0' && c <= '9') + || (c >= 'A' && c <= 'Z') + || (c >= 'a' && c <= 'z')); +} /* A `token` is a sequence of ASCII digits, lowercase and uppercase letters, and the following punctiation marks: @@ -48,7 +46,11 @@ See: https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.2 */ -#define CT_TOKEN 0x020000 +static APR_INLINE int ct_istoken(char c) +{ + static const char punct[] = "!#$%&'*+-.^_`|~"; + return c && (ct_isalnum(c) || strchr(punct, c)); +} /* A `token68` is a sequence of ASCII digits, lowercase and uppercase letters, and the following punctiation marks: @@ -58,283 +60,15 @@ followed by zero or more `=` signs. See: https://www.rfc-editor.org/rfc/rfc9110.html#section-11.2 */ -#define CT_TOKEN68 0x040000 - - -/* ASCII + UTF-8 character table, stolen wholesale from Subversion. */ -static const apr_uint32_t char_table[256] = -{ - /* **** DO NOT EDIT! **** - This table was generated by genctype.py, make changes there. */ - /* nul */ CT_ASCII | CT_CNTRL, - /* soh */ CT_ASCII | CT_CNTRL, - /* stx */ CT_ASCII | CT_CNTRL, - /* etx */ CT_ASCII | CT_CNTRL, - /* eot */ CT_ASCII | CT_CNTRL, - /* enq */ CT_ASCII | CT_CNTRL, - /* ack */ CT_ASCII | CT_CNTRL, - /* bel */ CT_ASCII | CT_CNTRL, - /* bs */ CT_ASCII | CT_CNTRL, - /* ht */ CT_ASCII | CT_CNTRL | CT_SPACE | CT_WHITESPACE, - /* nl */ CT_ASCII | CT_CNTRL | CT_SPACE, - /* vt */ CT_ASCII | CT_CNTRL | CT_SPACE, - /* np */ CT_ASCII | CT_CNTRL | CT_SPACE, - /* cr */ CT_ASCII | CT_CNTRL | CT_SPACE, - /* so */ CT_ASCII | CT_CNTRL, - /* si */ CT_ASCII | CT_CNTRL, - /* dle */ CT_ASCII | CT_CNTRL, - /* dc1 */ CT_ASCII | CT_CNTRL, - /* dc2 */ CT_ASCII | CT_CNTRL, - /* dc3 */ CT_ASCII | CT_CNTRL, - /* dc4 */ CT_ASCII | CT_CNTRL, - /* nak */ CT_ASCII | CT_CNTRL, - /* syn */ CT_ASCII | CT_CNTRL, - /* etb */ CT_ASCII | CT_CNTRL, - /* can */ CT_ASCII | CT_CNTRL, - /* em */ CT_ASCII | CT_CNTRL, - /* sub */ CT_ASCII | CT_CNTRL, - /* esc */ CT_ASCII | CT_CNTRL, - /* fs */ CT_ASCII | CT_CNTRL, - /* gs */ CT_ASCII | CT_CNTRL, - /* rs */ CT_ASCII | CT_CNTRL, - /* us */ CT_ASCII | CT_CNTRL, - /* sp */ CT_ASCII | CT_SPACE | CT_WHITESPACE, - /* ! */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* " */ CT_ASCII | CT_PUNCT, - /* # */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* $ */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* % */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* & */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* ' */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* ( */ CT_ASCII | CT_PUNCT, - /* ) */ CT_ASCII | CT_PUNCT, - /* * */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* + */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68, - /* , */ CT_ASCII | CT_PUNCT, - /* - */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68, - /* . */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68, - /* / */ CT_ASCII | CT_PUNCT | CT_TOKEN68, - /* 0 */ CT_ASCII | CT_DIGIT, - /* 1 */ CT_ASCII | CT_DIGIT, - /* 2 */ CT_ASCII | CT_DIGIT, - /* 3 */ CT_ASCII | CT_DIGIT, - /* 4 */ CT_ASCII | CT_DIGIT, - /* 5 */ CT_ASCII | CT_DIGIT, - /* 6 */ CT_ASCII | CT_DIGIT, - /* 7 */ CT_ASCII | CT_DIGIT, - /* 8 */ CT_ASCII | CT_DIGIT, - /* 9 */ CT_ASCII | CT_DIGIT, - /* : */ CT_ASCII | CT_PUNCT, - /* ; */ CT_ASCII | CT_PUNCT, - /* < */ CT_ASCII | CT_PUNCT, - /* = */ CT_ASCII | CT_PUNCT, - /* > */ CT_ASCII | CT_PUNCT, - /* ? */ CT_ASCII | CT_PUNCT, - /* @ */ CT_ASCII | CT_PUNCT, - /* A */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* B */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* C */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* D */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* E */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* F */ CT_ASCII | CT_UPPER | CT_XALPHA, - /* G */ CT_ASCII | CT_UPPER, - /* H */ CT_ASCII | CT_UPPER, - /* I */ CT_ASCII | CT_UPPER, - /* J */ CT_ASCII | CT_UPPER, - /* K */ CT_ASCII | CT_UPPER, - /* L */ CT_ASCII | CT_UPPER, - /* M */ CT_ASCII | CT_UPPER, - /* N */ CT_ASCII | CT_UPPER, - /* O */ CT_ASCII | CT_UPPER, - /* P */ CT_ASCII | CT_UPPER, - /* Q */ CT_ASCII | CT_UPPER, - /* R */ CT_ASCII | CT_UPPER, - /* S */ CT_ASCII | CT_UPPER, - /* T */ CT_ASCII | CT_UPPER, - /* U */ CT_ASCII | CT_UPPER, - /* V */ CT_ASCII | CT_UPPER, - /* W */ CT_ASCII | CT_UPPER, - /* X */ CT_ASCII | CT_UPPER, - /* Y */ CT_ASCII | CT_UPPER, - /* Z */ CT_ASCII | CT_UPPER, - /* [ */ CT_ASCII | CT_PUNCT, - /* \ */ CT_ASCII | CT_PUNCT, - /* ] */ CT_ASCII | CT_PUNCT, - /* ^ */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* _ */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68, - /* ` */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* a */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* b */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* c */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* d */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* e */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* f */ CT_ASCII | CT_LOWER | CT_XALPHA, - /* g */ CT_ASCII | CT_LOWER, - /* h */ CT_ASCII | CT_LOWER, - /* i */ CT_ASCII | CT_LOWER, - /* j */ CT_ASCII | CT_LOWER, - /* k */ CT_ASCII | CT_LOWER, - /* l */ CT_ASCII | CT_LOWER, - /* m */ CT_ASCII | CT_LOWER, - /* n */ CT_ASCII | CT_LOWER, - /* o */ CT_ASCII | CT_LOWER, - /* p */ CT_ASCII | CT_LOWER, - /* q */ CT_ASCII | CT_LOWER, - /* r */ CT_ASCII | CT_LOWER, - /* s */ CT_ASCII | CT_LOWER, - /* t */ CT_ASCII | CT_LOWER, - /* u */ CT_ASCII | CT_LOWER, - /* v */ CT_ASCII | CT_LOWER, - /* w */ CT_ASCII | CT_LOWER, - /* x */ CT_ASCII | CT_LOWER, - /* y */ CT_ASCII | CT_LOWER, - /* z */ CT_ASCII | CT_LOWER, - /* { */ CT_ASCII | CT_PUNCT, - /* | */ CT_ASCII | CT_PUNCT | CT_TOKEN, - /* } */ CT_ASCII | CT_PUNCT, - /* ~ */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68, - /* del */ CT_ASCII | CT_CNTRL, - /* x80 */ CT_UTF8_CONT, - /* x81 */ CT_UTF8_CONT, - /* x82 */ CT_UTF8_CONT, - /* x83 */ CT_UTF8_CONT, - /* x84 */ CT_UTF8_CONT, - /* x85 */ CT_UTF8_CONT, - /* x86 */ CT_UTF8_CONT, - /* x87 */ CT_UTF8_CONT, - /* x88 */ CT_UTF8_CONT, - /* x89 */ CT_UTF8_CONT, - /* x8a */ CT_UTF8_CONT, - /* x8b */ CT_UTF8_CONT, - /* x8c */ CT_UTF8_CONT, - /* x8d */ CT_UTF8_CONT, - /* x8e */ CT_UTF8_CONT, - /* x8f */ CT_UTF8_CONT, - /* x90 */ CT_UTF8_CONT, - /* x91 */ CT_UTF8_CONT, - /* x92 */ CT_UTF8_CONT, - /* x93 */ CT_UTF8_CONT, - /* x94 */ CT_UTF8_CONT, - /* x95 */ CT_UTF8_CONT, - /* x96 */ CT_UTF8_CONT, - /* x97 */ CT_UTF8_CONT, - /* x98 */ CT_UTF8_CONT, - /* x99 */ CT_UTF8_CONT, - /* x9a */ CT_UTF8_CONT, - /* x9b */ CT_UTF8_CONT, - /* x9c */ CT_UTF8_CONT, - /* x9d */ CT_UTF8_CONT, - /* x9e */ CT_UTF8_CONT, - /* x9f */ CT_UTF8_CONT, - /* xa0 */ CT_UTF8_CONT, - /* xa1 */ CT_UTF8_CONT, - /* xa2 */ CT_UTF8_CONT, - /* xa3 */ CT_UTF8_CONT, - /* xa4 */ CT_UTF8_CONT, - /* xa5 */ CT_UTF8_CONT, - /* xa6 */ CT_UTF8_CONT, - /* xa7 */ CT_UTF8_CONT, - /* xa8 */ CT_UTF8_CONT, - /* xa9 */ CT_UTF8_CONT, - /* xaa */ CT_UTF8_CONT, - /* xab */ CT_UTF8_CONT, - /* xac */ CT_UTF8_CONT, - /* xad */ CT_UTF8_CONT, - /* xae */ CT_UTF8_CONT, - /* xaf */ CT_UTF8_CONT, - /* xb0 */ CT_UTF8_CONT, - /* xb1 */ CT_UTF8_CONT, - /* xb2 */ CT_UTF8_CONT, - /* xb3 */ CT_UTF8_CONT, - /* xb4 */ CT_UTF8_CONT, - /* xb5 */ CT_UTF8_CONT, - /* xb6 */ CT_UTF8_CONT, - /* xb7 */ CT_UTF8_CONT, - /* xb8 */ CT_UTF8_CONT, - /* xb9 */ CT_UTF8_CONT, - /* xba */ CT_UTF8_CONT, - /* xbb */ CT_UTF8_CONT, - /* xbc */ CT_UTF8_CONT, - /* xbd */ CT_UTF8_CONT, - /* xc5 */ CT_UTF8_LEAD, - /* xc6 */ CT_UTF8_LEAD, - /* xc7 */ CT_UTF8_LEAD, - /* xc8 */ CT_UTF8_LEAD, - /* xc9 */ CT_UTF8_LEAD, - /* xca */ CT_UTF8_LEAD, - /* xcb */ CT_UTF8_LEAD, - /* xcc */ CT_UTF8_LEAD, - /* xcd */ CT_UTF8_LEAD, - /* xce */ CT_UTF8_LEAD, - /* xcf */ CT_UTF8_LEAD, - /* xd0 */ CT_UTF8_LEAD, - /* xd1 */ CT_UTF8_LEAD, - /* xd2 */ CT_UTF8_LEAD, - /* xd3 */ CT_UTF8_LEAD, - /* xd4 */ CT_UTF8_LEAD, - /* xd5 */ CT_UTF8_LEAD, - /* xd6 */ CT_UTF8_LEAD, - /* xd7 */ CT_UTF8_LEAD, - /* xd8 */ CT_UTF8_LEAD, - /* xd9 */ CT_UTF8_LEAD, - /* xda */ CT_UTF8_LEAD, - /* xdb */ CT_UTF8_LEAD, - /* xdc */ CT_UTF8_LEAD, - /* xdd */ CT_UTF8_LEAD, - /* xde */ CT_UTF8_LEAD, - /* xdf */ CT_UTF8_LEAD, - /* xe0 */ 0, - /* xe1 */ CT_UTF8_LEAD, - /* xe2 */ CT_UTF8_LEAD, - /* xe3 */ CT_UTF8_LEAD, - /* xe4 */ CT_UTF8_LEAD, - /* xe5 */ CT_UTF8_LEAD, - /* xe6 */ CT_UTF8_LEAD, - /* xe7 */ CT_UTF8_LEAD, - /* xe8 */ CT_UTF8_LEAD, - /* xe9 */ CT_UTF8_LEAD, - /* xea */ CT_UTF8_LEAD, - /* xeb */ CT_UTF8_LEAD, - /* xec */ CT_UTF8_LEAD, - /* xed */ CT_UTF8_LEAD, - /* xee */ CT_UTF8_LEAD, - /* xef */ CT_UTF8_LEAD, - /* xf0 */ 0, - /* xf1 */ CT_UTF8_LEAD, - /* xf2 */ CT_UTF8_LEAD, - /* xf3 */ CT_UTF8_LEAD, - /* xf4 */ CT_UTF8_LEAD, - /* xf5 */ CT_UTF8_LEAD, - /* xf6 */ CT_UTF8_LEAD, - /* xf7 */ CT_UTF8_LEAD, - /* xf8 */ 0, - /* xf9 */ CT_UTF8_LEAD, - /* xfa */ CT_UTF8_LEAD, - /* xfb */ CT_UTF8_LEAD, - /* xfc */ 0, - /* xfd */ CT_UTF8_LEAD, - /* xfe */ 0, - /* xff */ 0 -}; - -/* Character class lookup */ -static APR_INLINE int ct_isspace(char c) -{ - return char_table[0xff & (unsigned char)c] & CT_WHITESPACE; -} - -static APR_INLINE int ct_istoken(char c) -{ - static const apr_uint32_t ct = CT_TOKEN | CT_DIGIT | CT_UPPER | CT_LOWER; - return ct & char_table[0xff & (unsigned char)c]; -} - static APR_INLINE int ct_istoken68(char c) { - static const apr_uint32_t ct = CT_TOKEN68 | CT_DIGIT | CT_UPPER | CT_LOWER; - return ct & char_table[0xff & (unsigned char)c]; + static const char punct[] = "-._~+/"; + return c && (ct_isalnum(c) || strchr(punct, c)); } +/*******************************************************************/ +/* Syntactic elements. */ + /* Fold ASCII to lowercase. */ static APR_INLINE char ct_tolower(char c) { @@ -346,14 +80,14 @@ static APR_INLINE char ct_tolower(char c return c; } - +/* Skip spaces. */ static const char *skip_space(const char *src) { - while (ct_isspace(*src)) - ++src; - return src; + /* In HTTP land, only tab and space count as whitespace. */ + return src + strspn(src, " \t"); } +/* Skip token68 */ static const char *skip_token68(const char *src) { while (ct_istoken68(*src)) @@ -428,6 +162,8 @@ static const char *copy_token(char **dst return src; } +/*******************************************************************/ +/* Internal API */ apr_hash_t *serf__parse_authn_parameters(const char *attrs, apr_pool_t *pool) {