Author: brane
Date: Sun Jul 27 22:44:04 2025
New Revision: 1927498

Log:
On the user-defined-authn branch: Remove the character classification
table. It turns out that range checks are faster than table lookups.
Which I should have known.

* src/genctype.py: Removed.
* src/syntax.c:
   - Declare string functions through <apr_want.h>
   - Remove the character classification table and macros.
  (ct_isalnum): New; finds ASCII digits, uppercase and lowercase letters.
  (ct_istoken, ct_istoken68): Reimplement with range checks.
  (skip_space): Likewses; use strspn instead.

Deleted:
   serf/branches/user-defined-authn/src/genctype.py
Modified:
   serf/branches/user-defined-authn/src/syntax.c

Modified: serf/branches/user-defined-authn/src/syntax.c
==============================================================================
--- serf/branches/user-defined-authn/src/syntax.c       Sun Jul 27 20:10:02 
2025        (r1927497)
+++ serf/branches/user-defined-authn/src/syntax.c       Sun Jul 27 22:44:04 
2025        (r1927498)
@@ -22,24 +22,22 @@
 #include <apr_pools.h>
 #include <apr_hash.h>
 
+#define APR_WANT_STRFUNC
+#include <apr_want.h>
+
 #include "serf.h"
 #include "serf_private.h"
 
 
-/* Character classes */
-#define CT_ASCII        0x0001
-#define CT_CNTRL        0x0002
-#define CT_SPACE        0x0004
-#define CT_PUNCT        0x0008
-#define CT_DIGIT        0x0010
-#define CT_UPPER        0x0020
-#define CT_LOWER        0x0040
-#define CT_XALPHA       0x0080
-#define CT_UTF8_CONT    0x0100
-#define CT_UTF8_LEAD    0x0200
+/*******************************************************************/
+/* Character class lookup */
 
-/* Only space and the horizontal tab are treated as whitespace. */
-#define CT_WHITESPACE 0x010000
+static APR_INLINE int ct_isalnum(char c)
+{
+    return ((c >= '0' && c <= '9')
+            || (c >= 'A' && c <= 'Z')
+            || (c >= 'a' && c <= 'z'));
+}
 
 /* A `token` is a sequence of ASCII digits, lowercase and uppercase letters,
    and the following punctiation marks:
@@ -48,7 +46,11 @@
 
    See: https://www.rfc-editor.org/rfc/rfc9110.html#section-5.6.2
 */
-#define CT_TOKEN      0x020000
+static APR_INLINE int ct_istoken(char c)
+{
+    static const char punct[] = "!#$%&'*+-.^_`|~";
+    return c && (ct_isalnum(c) || strchr(punct, c));
+}
 
 /* A `token68` is a sequence of ASCII digits, lowercase and uppercase letters,
    and the following punctiation marks:
@@ -58,283 +60,15 @@
    followed by zero or more `=` signs.
    See: https://www.rfc-editor.org/rfc/rfc9110.html#section-11.2
 */
-#define CT_TOKEN68    0x040000
-
-
-/* ASCII + UTF-8 character table, stolen wholesale from Subversion. */
-static const apr_uint32_t char_table[256] =
-{
-    /* **** DO NOT EDIT! ****
-       This table was generated by genctype.py, make changes there. */
-    /* nul */ CT_ASCII | CT_CNTRL,
-    /* soh */ CT_ASCII | CT_CNTRL,
-    /* stx */ CT_ASCII | CT_CNTRL,
-    /* etx */ CT_ASCII | CT_CNTRL,
-    /* eot */ CT_ASCII | CT_CNTRL,
-    /* enq */ CT_ASCII | CT_CNTRL,
-    /* ack */ CT_ASCII | CT_CNTRL,
-    /* bel */ CT_ASCII | CT_CNTRL,
-    /* bs  */ CT_ASCII | CT_CNTRL,
-    /* ht  */ CT_ASCII | CT_CNTRL | CT_SPACE | CT_WHITESPACE,
-    /* nl  */ CT_ASCII | CT_CNTRL | CT_SPACE,
-    /* vt  */ CT_ASCII | CT_CNTRL | CT_SPACE,
-    /* np  */ CT_ASCII | CT_CNTRL | CT_SPACE,
-    /* cr  */ CT_ASCII | CT_CNTRL | CT_SPACE,
-    /* so  */ CT_ASCII | CT_CNTRL,
-    /* si  */ CT_ASCII | CT_CNTRL,
-    /* dle */ CT_ASCII | CT_CNTRL,
-    /* dc1 */ CT_ASCII | CT_CNTRL,
-    /* dc2 */ CT_ASCII | CT_CNTRL,
-    /* dc3 */ CT_ASCII | CT_CNTRL,
-    /* dc4 */ CT_ASCII | CT_CNTRL,
-    /* nak */ CT_ASCII | CT_CNTRL,
-    /* syn */ CT_ASCII | CT_CNTRL,
-    /* etb */ CT_ASCII | CT_CNTRL,
-    /* can */ CT_ASCII | CT_CNTRL,
-    /* em  */ CT_ASCII | CT_CNTRL,
-    /* sub */ CT_ASCII | CT_CNTRL,
-    /* esc */ CT_ASCII | CT_CNTRL,
-    /* fs  */ CT_ASCII | CT_CNTRL,
-    /* gs  */ CT_ASCII | CT_CNTRL,
-    /* rs  */ CT_ASCII | CT_CNTRL,
-    /* us  */ CT_ASCII | CT_CNTRL,
-    /* sp  */ CT_ASCII | CT_SPACE | CT_WHITESPACE,
-    /*  !  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  "  */ CT_ASCII | CT_PUNCT,
-    /*  #  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  $  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  %  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  &  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  '  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  (  */ CT_ASCII | CT_PUNCT,
-    /*  )  */ CT_ASCII | CT_PUNCT,
-    /*  *  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  +  */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
-    /*  ,  */ CT_ASCII | CT_PUNCT,
-    /*  -  */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
-    /*  .  */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
-    /*  /  */ CT_ASCII | CT_PUNCT | CT_TOKEN68,
-    /*  0  */ CT_ASCII | CT_DIGIT,
-    /*  1  */ CT_ASCII | CT_DIGIT,
-    /*  2  */ CT_ASCII | CT_DIGIT,
-    /*  3  */ CT_ASCII | CT_DIGIT,
-    /*  4  */ CT_ASCII | CT_DIGIT,
-    /*  5  */ CT_ASCII | CT_DIGIT,
-    /*  6  */ CT_ASCII | CT_DIGIT,
-    /*  7  */ CT_ASCII | CT_DIGIT,
-    /*  8  */ CT_ASCII | CT_DIGIT,
-    /*  9  */ CT_ASCII | CT_DIGIT,
-    /*  :  */ CT_ASCII | CT_PUNCT,
-    /*  ;  */ CT_ASCII | CT_PUNCT,
-    /*  <  */ CT_ASCII | CT_PUNCT,
-    /*  =  */ CT_ASCII | CT_PUNCT,
-    /*  >  */ CT_ASCII | CT_PUNCT,
-    /*  ?  */ CT_ASCII | CT_PUNCT,
-    /*  @  */ CT_ASCII | CT_PUNCT,
-    /*  A  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  B  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  C  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  D  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  E  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  F  */ CT_ASCII | CT_UPPER | CT_XALPHA,
-    /*  G  */ CT_ASCII | CT_UPPER,
-    /*  H  */ CT_ASCII | CT_UPPER,
-    /*  I  */ CT_ASCII | CT_UPPER,
-    /*  J  */ CT_ASCII | CT_UPPER,
-    /*  K  */ CT_ASCII | CT_UPPER,
-    /*  L  */ CT_ASCII | CT_UPPER,
-    /*  M  */ CT_ASCII | CT_UPPER,
-    /*  N  */ CT_ASCII | CT_UPPER,
-    /*  O  */ CT_ASCII | CT_UPPER,
-    /*  P  */ CT_ASCII | CT_UPPER,
-    /*  Q  */ CT_ASCII | CT_UPPER,
-    /*  R  */ CT_ASCII | CT_UPPER,
-    /*  S  */ CT_ASCII | CT_UPPER,
-    /*  T  */ CT_ASCII | CT_UPPER,
-    /*  U  */ CT_ASCII | CT_UPPER,
-    /*  V  */ CT_ASCII | CT_UPPER,
-    /*  W  */ CT_ASCII | CT_UPPER,
-    /*  X  */ CT_ASCII | CT_UPPER,
-    /*  Y  */ CT_ASCII | CT_UPPER,
-    /*  Z  */ CT_ASCII | CT_UPPER,
-    /*  [  */ CT_ASCII | CT_PUNCT,
-    /*  \  */ CT_ASCII | CT_PUNCT,
-    /*  ]  */ CT_ASCII | CT_PUNCT,
-    /*  ^  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  _  */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
-    /*  `  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  a  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  b  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  c  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  d  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  e  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  f  */ CT_ASCII | CT_LOWER | CT_XALPHA,
-    /*  g  */ CT_ASCII | CT_LOWER,
-    /*  h  */ CT_ASCII | CT_LOWER,
-    /*  i  */ CT_ASCII | CT_LOWER,
-    /*  j  */ CT_ASCII | CT_LOWER,
-    /*  k  */ CT_ASCII | CT_LOWER,
-    /*  l  */ CT_ASCII | CT_LOWER,
-    /*  m  */ CT_ASCII | CT_LOWER,
-    /*  n  */ CT_ASCII | CT_LOWER,
-    /*  o  */ CT_ASCII | CT_LOWER,
-    /*  p  */ CT_ASCII | CT_LOWER,
-    /*  q  */ CT_ASCII | CT_LOWER,
-    /*  r  */ CT_ASCII | CT_LOWER,
-    /*  s  */ CT_ASCII | CT_LOWER,
-    /*  t  */ CT_ASCII | CT_LOWER,
-    /*  u  */ CT_ASCII | CT_LOWER,
-    /*  v  */ CT_ASCII | CT_LOWER,
-    /*  w  */ CT_ASCII | CT_LOWER,
-    /*  x  */ CT_ASCII | CT_LOWER,
-    /*  y  */ CT_ASCII | CT_LOWER,
-    /*  z  */ CT_ASCII | CT_LOWER,
-    /*  {  */ CT_ASCII | CT_PUNCT,
-    /*  |  */ CT_ASCII | CT_PUNCT | CT_TOKEN,
-    /*  }  */ CT_ASCII | CT_PUNCT,
-    /*  ~  */ CT_ASCII | CT_PUNCT | CT_TOKEN | CT_TOKEN68,
-    /* del */ CT_ASCII | CT_CNTRL,
-    /* x80 */ CT_UTF8_CONT,
-    /* x81 */ CT_UTF8_CONT,
-    /* x82 */ CT_UTF8_CONT,
-    /* x83 */ CT_UTF8_CONT,
-    /* x84 */ CT_UTF8_CONT,
-    /* x85 */ CT_UTF8_CONT,
-    /* x86 */ CT_UTF8_CONT,
-    /* x87 */ CT_UTF8_CONT,
-    /* x88 */ CT_UTF8_CONT,
-    /* x89 */ CT_UTF8_CONT,
-    /* x8a */ CT_UTF8_CONT,
-    /* x8b */ CT_UTF8_CONT,
-    /* x8c */ CT_UTF8_CONT,
-    /* x8d */ CT_UTF8_CONT,
-    /* x8e */ CT_UTF8_CONT,
-    /* x8f */ CT_UTF8_CONT,
-    /* x90 */ CT_UTF8_CONT,
-    /* x91 */ CT_UTF8_CONT,
-    /* x92 */ CT_UTF8_CONT,
-    /* x93 */ CT_UTF8_CONT,
-    /* x94 */ CT_UTF8_CONT,
-    /* x95 */ CT_UTF8_CONT,
-    /* x96 */ CT_UTF8_CONT,
-    /* x97 */ CT_UTF8_CONT,
-    /* x98 */ CT_UTF8_CONT,
-    /* x99 */ CT_UTF8_CONT,
-    /* x9a */ CT_UTF8_CONT,
-    /* x9b */ CT_UTF8_CONT,
-    /* x9c */ CT_UTF8_CONT,
-    /* x9d */ CT_UTF8_CONT,
-    /* x9e */ CT_UTF8_CONT,
-    /* x9f */ CT_UTF8_CONT,
-    /* xa0 */ CT_UTF8_CONT,
-    /* xa1 */ CT_UTF8_CONT,
-    /* xa2 */ CT_UTF8_CONT,
-    /* xa3 */ CT_UTF8_CONT,
-    /* xa4 */ CT_UTF8_CONT,
-    /* xa5 */ CT_UTF8_CONT,
-    /* xa6 */ CT_UTF8_CONT,
-    /* xa7 */ CT_UTF8_CONT,
-    /* xa8 */ CT_UTF8_CONT,
-    /* xa9 */ CT_UTF8_CONT,
-    /* xaa */ CT_UTF8_CONT,
-    /* xab */ CT_UTF8_CONT,
-    /* xac */ CT_UTF8_CONT,
-    /* xad */ CT_UTF8_CONT,
-    /* xae */ CT_UTF8_CONT,
-    /* xaf */ CT_UTF8_CONT,
-    /* xb0 */ CT_UTF8_CONT,
-    /* xb1 */ CT_UTF8_CONT,
-    /* xb2 */ CT_UTF8_CONT,
-    /* xb3 */ CT_UTF8_CONT,
-    /* xb4 */ CT_UTF8_CONT,
-    /* xb5 */ CT_UTF8_CONT,
-    /* xb6 */ CT_UTF8_CONT,
-    /* xb7 */ CT_UTF8_CONT,
-    /* xb8 */ CT_UTF8_CONT,
-    /* xb9 */ CT_UTF8_CONT,
-    /* xba */ CT_UTF8_CONT,
-    /* xbb */ CT_UTF8_CONT,
-    /* xbc */ CT_UTF8_CONT,
-    /* xbd */ CT_UTF8_CONT,
-    /* xc5 */ CT_UTF8_LEAD,
-    /* xc6 */ CT_UTF8_LEAD,
-    /* xc7 */ CT_UTF8_LEAD,
-    /* xc8 */ CT_UTF8_LEAD,
-    /* xc9 */ CT_UTF8_LEAD,
-    /* xca */ CT_UTF8_LEAD,
-    /* xcb */ CT_UTF8_LEAD,
-    /* xcc */ CT_UTF8_LEAD,
-    /* xcd */ CT_UTF8_LEAD,
-    /* xce */ CT_UTF8_LEAD,
-    /* xcf */ CT_UTF8_LEAD,
-    /* xd0 */ CT_UTF8_LEAD,
-    /* xd1 */ CT_UTF8_LEAD,
-    /* xd2 */ CT_UTF8_LEAD,
-    /* xd3 */ CT_UTF8_LEAD,
-    /* xd4 */ CT_UTF8_LEAD,
-    /* xd5 */ CT_UTF8_LEAD,
-    /* xd6 */ CT_UTF8_LEAD,
-    /* xd7 */ CT_UTF8_LEAD,
-    /* xd8 */ CT_UTF8_LEAD,
-    /* xd9 */ CT_UTF8_LEAD,
-    /* xda */ CT_UTF8_LEAD,
-    /* xdb */ CT_UTF8_LEAD,
-    /* xdc */ CT_UTF8_LEAD,
-    /* xdd */ CT_UTF8_LEAD,
-    /* xde */ CT_UTF8_LEAD,
-    /* xdf */ CT_UTF8_LEAD,
-    /* xe0 */ 0,
-    /* xe1 */ CT_UTF8_LEAD,
-    /* xe2 */ CT_UTF8_LEAD,
-    /* xe3 */ CT_UTF8_LEAD,
-    /* xe4 */ CT_UTF8_LEAD,
-    /* xe5 */ CT_UTF8_LEAD,
-    /* xe6 */ CT_UTF8_LEAD,
-    /* xe7 */ CT_UTF8_LEAD,
-    /* xe8 */ CT_UTF8_LEAD,
-    /* xe9 */ CT_UTF8_LEAD,
-    /* xea */ CT_UTF8_LEAD,
-    /* xeb */ CT_UTF8_LEAD,
-    /* xec */ CT_UTF8_LEAD,
-    /* xed */ CT_UTF8_LEAD,
-    /* xee */ CT_UTF8_LEAD,
-    /* xef */ CT_UTF8_LEAD,
-    /* xf0 */ 0,
-    /* xf1 */ CT_UTF8_LEAD,
-    /* xf2 */ CT_UTF8_LEAD,
-    /* xf3 */ CT_UTF8_LEAD,
-    /* xf4 */ CT_UTF8_LEAD,
-    /* xf5 */ CT_UTF8_LEAD,
-    /* xf6 */ CT_UTF8_LEAD,
-    /* xf7 */ CT_UTF8_LEAD,
-    /* xf8 */ 0,
-    /* xf9 */ CT_UTF8_LEAD,
-    /* xfa */ CT_UTF8_LEAD,
-    /* xfb */ CT_UTF8_LEAD,
-    /* xfc */ 0,
-    /* xfd */ CT_UTF8_LEAD,
-    /* xfe */ 0,
-    /* xff */ 0
-};
-
-/* Character class lookup */
-static APR_INLINE int ct_isspace(char c)
-{
-    return char_table[0xff & (unsigned char)c] & CT_WHITESPACE;
-}
-
-static APR_INLINE int ct_istoken(char c)
-{
-    static const apr_uint32_t ct = CT_TOKEN | CT_DIGIT | CT_UPPER | CT_LOWER;
-    return ct & char_table[0xff & (unsigned char)c];
-}
-
 static APR_INLINE int ct_istoken68(char c)
 {
-    static const apr_uint32_t ct = CT_TOKEN68 | CT_DIGIT | CT_UPPER | CT_LOWER;
-    return ct & char_table[0xff & (unsigned char)c];
+    static const char punct[] = "-._~+/";
+    return c && (ct_isalnum(c) || strchr(punct, c));
 }
 
+/*******************************************************************/
+/* Syntactic elements. */
+
 /* Fold ASCII to lowercase. */
 static APR_INLINE char ct_tolower(char c)
 {
@@ -346,14 +80,14 @@ static APR_INLINE char ct_tolower(char c
     return c;
 }
 
-
+/* Skip spaces. */
 static const char *skip_space(const char *src)
 {
-    while (ct_isspace(*src))
-        ++src;
-    return src;
+    /* In HTTP land, only tab and space count as whitespace. */
+    return src + strspn(src, " \t");
 }
 
+/* Skip token68 */
 static const char *skip_token68(const char *src)
 {
     while (ct_istoken68(*src))
@@ -428,6 +162,8 @@ static const char *copy_token(char **dst
     return src;
 }
 
+/*******************************************************************/
+/* Internal API */
 
 apr_hash_t *serf__parse_authn_parameters(const char *attrs, apr_pool_t *pool)
 {

Reply via email to