Hi,
  attached patch is from branch lp:~squid/squid/characterset/

It extracts some of the work done in parser-ng to implement Alex'
Tokenizer API, bringing O(N) find-any{,-not}-of matching to SBuf.

Note: in IRC discussions, Amos mentioned find_first_of and
find_first_not_of not being naming convention compliant (that's right,
they are modeled after std::string). I'm fine with changing them as a
follow-up commit if this is the collegial decision; I'd avoid do to it
as part of this merge.

-- 
    /kinkie
# Bazaar merge directive format 2 (Bazaar 0.90)
# revision_id: kin...@squid-cache.org-20131215114707-rqo205ert6ix8jwp
# target_branch: ../trunk/
# testament_sha1: 2b205c8cc59e789afd02c0f28c60e293d4cd0fab
# timestamp: 2013-12-15 12:47:44 +0100
# base_revision_id: kin...@squid-cache.org-20131215092850-\
#   zm096u228wthxqiw
# 
# Begin patch
=== modified file 'src/Makefile.am'
--- src/Makefile.am	2013-12-05 15:26:22 +0000
+++ src/Makefile.am	2013-12-15 11:47:07 +0000
@@ -15,6 +15,7 @@
 	DnsLookupDetails.cc
 
 SBUF_SOURCE= \
+	base/CharacterSet.h \
 	base/InstanceId.h \
 	MemBlob.h \
 	MemBlob.cc \

=== modified file 'src/SBuf.cc'
--- src/SBuf.cc	2013-11-27 15:52:03 +0000
+++ src/SBuf.cc	2013-12-15 11:47:07 +0000
@@ -688,12 +688,8 @@
 }
 
 SBuf::size_type
-SBuf::find_first_of(const SBuf &set, size_type startPos) const
+SBuf::find_first_of(const CharacterSet &set, size_type startPos) const
 {
-    // if set is 1 char big, use the char search. Stats updated there
-    if (set.length() == 1)
-        return find(set[0], startPos);
-
     ++stats.find;
 
     if (startPos == npos)
@@ -702,20 +698,41 @@
     if (startPos >= length())
         return npos;
 
-    if (set.length() == 0)
-        return npos;
-
-    debugs(24, 7, "any of '" << set << "' " << " in id " << id);
-    char *cur = buf()+startPos, *end = bufEnd();
+    debugs(24, 7, "first of characterset " << set.name << " in id " << id);
+    char *cur = buf()+startPos;
+    const char *end = bufEnd();
     while (cur < end) {
-        if (memchr(set.buf(), *cur, set.length()))
-            return (cur-buf());
+        if (set[*cur])
+            return cur-buf();
         ++cur;
     }
     debugs(24, 7, "not found");
     return npos;
 }
 
+SBuf::size_type
+SBuf::find_first_not_of(const CharacterSet &set, size_type startPos) const
+{
+    ++stats.find;
+
+    if (startPos == npos)
+        return npos;
+
+    if (startPos >= length())
+        return npos;
+
+    debugs(24, 7, "first not of characterset " << set.name << " in id " << id);
+    char *cur = buf()+startPos;
+    const char *end = bufEnd();
+    while (cur < end) {
+        if (!set[*cur])
+            return cur-buf();
+        ++cur;
+    }
+    debugs(24, 7, "not found");
+    return length();
+}
+
 /*
  * TODO: borrow a sscanf implementation from Linux or similar?
  * we'd really need a vsnscanf(3)... ? As an alternative, a

=== modified file 'src/SBuf.h'
--- src/SBuf.h	2013-12-04 18:37:08 +0000
+++ src/SBuf.h	2013-12-15 11:47:07 +0000
@@ -29,6 +29,7 @@
 #ifndef SQUID_SBUF_H
 #define SQUID_SBUF_H
 
+#include "base/CharacterSet.h"
 #include "base/InstanceId.h"
 #include "MemBlob.h"
 #include "SBufExceptions.h"
@@ -513,7 +514,15 @@
      * \param startPos if specified, ignore any occurrences before that position
      *   if npos, then npos is always returned
      */
-    size_type find_first_of(const SBuf &set, size_type startPos = 0) const;
+    size_type find_first_of(const CharacterSet &set, size_type startPos = 0) const;
+
+    /** Find first occurrence character NOT in character set
+     *
+     * \return length() if all characters in the SBuf are from set
+     * \param startPos if specified, ignore any occurrences before that position
+     *   if npos, then npos is always returned
+     */
+    size_type find_first_not_of(const CharacterSet &set, size_type startPos = 0) const;
 
     /** sscanf-alike
      *

=== added file 'src/base/CharacterSet.h'
--- src/base/CharacterSet.h	1970-01-01 00:00:00 +0000
+++ src/base/CharacterSet.h	2013-12-15 11:47:07 +0000
@@ -0,0 +1,44 @@
+#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
+#define _SQUID_SRC_PARSER_CHARACTERSET_H
+
+#include <vector>
+
+class CharacterSet
+{
+public:
+    CharacterSet(const char *label, const char * const c) : name(label), chars_(std::vector<bool>(256,false)) {
+        size_t clen = strlen(c);
+        for (size_t i = 0; i < clen; ++i)
+            chars_[static_cast<uint8_t>(c[i])] = true;
+    }
+
+    /// whether a given character exists in the set
+    bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)];}
+
+    /// add a given char to the character set. c must be >= 0.
+    CharacterSet & add(const unsigned char c) {chars_[static_cast<uint8_t>(c)] = true; return *this; }
+
+    /// add all characters from the given CharacterSet to this one
+    const CharacterSet &operator +=(const CharacterSet &src) {
+        //precondition: src.chars_.size() == chars_.size()
+        std::vector<bool>::const_iterator s = src.chars_.begin();
+        const std::vector<bool>::const_iterator e = src.chars_.end();
+        std::vector<bool>::iterator d = chars_.begin();
+        while (s != e) {
+            if (*s)
+                *d = true;
+            ++s;
+            ++d;
+        }
+        return *this;
+    }
+
+    /// name of this character set
+    const char * name;
+
+private:
+    /// characters defined in this set
+    std::vector<bool> chars_; //std::vector<bool> is optimized
+};
+
+#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */

=== modified file 'src/base/Makefile.am'
--- src/base/Makefile.am	2013-11-26 09:43:29 +0000
+++ src/base/Makefile.am	2013-12-15 11:47:07 +0000
@@ -12,6 +12,7 @@
 	AsyncJobCalls.h \
 	AsyncCallQueue.cc \
 	AsyncCallQueue.h \
+	CharacterSet.h \
 	TidyPointer.h \
 	CbcPointer.h \
 	InstanceId.h \

=== modified file 'src/icmp/Makefile.am'
--- src/icmp/Makefile.am	2013-12-03 07:49:13 +0000
+++ src/icmp/Makefile.am	2013-12-15 11:47:07 +0000
@@ -23,6 +23,7 @@
 noinst_LTLIBRARIES = libicmp-core.la libicmp.la
 
 SBUF_SOURCE= \
+	$(top_srcdir)/src/base/CharacterSet.h \
 	$(top_srcdir)/src/SBuf.h \
 	$(top_srcdir)/src/SBuf.cc \
 	$(top_srcdir)/src/MemBlob.h \

=== modified file 'src/tests/SBufFindTest.cc'
--- src/tests/SBufFindTest.cc	2013-11-11 12:06:11 +0000
+++ src/tests/SBufFindTest.cc	2013-12-15 11:47:07 +0000
@@ -105,7 +105,7 @@
 {
     theFindString = theStringHay.find_first_of(theStringNeedle, thePos);
     theBareNeedlePos = theStringHay.find_first_of(theStringNeedle);
-    theFindSBuf = theSBufHay.find_first_of(theSBufNeedle, thePos);
+    theFindSBuf = theSBufHay.find_first_of(CharacterSet("cs",theSBufNeedle.c_str()), thePos);
     checkResults("find_first_of");
 }
 

=== modified file 'src/tests/testSBuf.cc'
--- src/tests/testSBuf.cc	2013-11-27 15:52:03 +0000
+++ src/tests/testSBuf.cc	2013-12-15 11:47:07 +0000
@@ -759,23 +759,47 @@
     SBuf::size_type idx;
 
     // not found
-    idx=haystack.find_first_of(SBuf("ADHRWYP"));
+    idx=haystack.find_first_of(CharacterSet("t1","ADHRWYP"));
     CPPUNIT_ASSERT_EQUAL(SBuf::npos,idx);
 
     // found at beginning
-    idx=haystack.find_first_of(SBuf("THANDF"));
+    idx=haystack.find_first_of(CharacterSet("t2","THANDF"));
     CPPUNIT_ASSERT_EQUAL(0U,idx);
 
     //found at end of haystack
-    idx=haystack.find_first_of(SBuf("QWERYVg"));
+    idx=haystack.find_first_of(CharacterSet("t3","QWERYVg"));
     CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx);
 
     //found in the middle of haystack
-    idx=haystack.find_first_of(SBuf("QWERqYV"));
+    idx=haystack.find_first_of(CharacterSet("t4","QWERqYV"));
     CPPUNIT_ASSERT_EQUAL(4U,idx);
 }
 
 void
+testSBuf::testFindFirstNotOf()
+{
+    SBuf haystack(literal);
+    SBuf::size_type idx;
+
+    // all chars from the set
+    idx=haystack.find_first_not_of(CharacterSet("t1",literal.c_str()));
+    CPPUNIT_ASSERT_EQUAL(haystack.length(),idx);
+
+    // found at beginning
+    idx=haystack.find_first_not_of(CharacterSet("t2","a"));
+    CPPUNIT_ASSERT_EQUAL(0U,idx);
+
+    //found at end of haystack
+    idx=haystack.find_first_not_of(CharacterSet("t3",literal.substr(0,literal.length()-1).c_str()));
+    CPPUNIT_ASSERT_EQUAL(haystack.length()-1,idx);
+
+    //found in the middle of haystack
+    idx=haystack.find_first_not_of(CharacterSet("t4","The"));
+    CPPUNIT_ASSERT_EQUAL(3U,idx);
+}
+
+
+void
 testSBuf::testAutoFind()
 {
     SBufFindTest test;

=== modified file 'src/tests/testSBuf.h'
--- src/tests/testSBuf.h	2013-07-26 09:20:09 +0000
+++ src/tests/testSBuf.h	2013-12-15 11:47:07 +0000
@@ -35,6 +35,7 @@
     CPPUNIT_TEST( testRFindChar );
     CPPUNIT_TEST( testRFindSBuf );
     CPPUNIT_TEST( testFindFirstOf );
+    CPPUNIT_TEST( testFindFirstNotOf );
     CPPUNIT_TEST( testPrintf );
     CPPUNIT_TEST( testScanf );
     CPPUNIT_TEST( testCopy );
@@ -79,6 +80,7 @@
     void testStartsWith();
     void testSBufStream();
     void testFindFirstOf();
+    void testFindFirstNotOf();
     void testAutoFind();
     void testStdStringOps();
 };

# Begin bundle
IyBCYXphYXIgcmV2aXNpb24gYnVuZGxlIHY0CiMKQlpoOTFBWSZTWVHCPn4ACPLfgGR0fX///39n
/66////6YA79p9PHO3AeuS9FFUClAx3zl3vek6o665N7m6mqNGWstVQkklT8Jgin6m0ZEeo9KZlG
R6BlHqbU0BtJoAAZBKmiZMRpoKPSbKhpk2SabUAAaaAGgAAA0Jk1NT1TQMRkZA000AAAAAAAABIk
RomhJmAKehPSZTY0pskyPFNpBkyaNqNBoaDakhMEzVA0NAGgNBoAAAAADagBJEE0AEACahpk0nkS
fqjJmiaANDTQGjCUSQsrBFFC+Z4kwYtufgNerflG/xlj9LTY8kj7oWIHpR3z1XHBWuc/oMtKI1/5
FHEgMB3DQ2i1DmWspjzT9/eXBWUz7aVEwj4vl0TwseXByknNDhrXgx34YWaoiIz9GOKV0z3pUy22
0AUMSLazSxsi9eK6KIQYeEaqpzlW/KbwrgYVyhqvbI06oQJSBINb/dErPKYvpUkvgSxOP6aZnaNJ
hiccSDvA0AMbGwbTbbY2hsX1Kj/cQXasFJZnGlmp6nVEKZjEVGMWTWkSY8oCAdYSk1Bh5Kyhlk6N
OLg6DQ6xUowJzQ2MpGIKjyjOzCrhCoBBLARRYKC1cPwY5RiYrAnjdtUnX0JRCaK1sbEGhNHNRNTx
QM1JAyDCnQlKLh9iImdRqpjv3ltK1NLJJB72SFFiJw/22lsBPqXK4GyKx1muFtnvzKGIibUcEjru
2YxUFtL71jNpCVRLQm6xBPZGAiM2aD/c201hfgZUKmLY/DC4QSy6UcadBzZqc4kdSuQTAr31rMyb
dxEuZ0KASkl13dqsjM8YkVqm3EM8qCq7JzcVJWWfn6+rvs18F67d3m5S2agDe4IBjTTMzM7IGJNi
gBpdKOuaXK9aDczPScKkQt6BvxQ5Uq9vgXNFiCklAYbr1tB3c8I/j1dPJhx9UyWyfjciOIlGGzBK
cNcwpkHRELfzf8rt2orBsKXFs1c1Zhkdp1M6LhuGfyeqFazu9UP1R+Abo+lc6lixcASyZBdsvjoc
9TOzw0Ijw2u02hqTN9ecrUoKDKjlGMZ5vmMCQxI7gyjsLyZiwGMAMpWbeorYeYlviHLnmEil1dSi
GuQOpQcJjrezwshipMQYtoYWE0ViZFrg7C0ALCrlOVLZxQNtjS45CKlh0mcMZIIqLLN3NXZt5nO5
COTByZKphJmCE+SaMTvmnJJLmqvv6FYVzQkOXWlwxRQ5kqUV8uESO60ILlXQPSzrOBUKi0d+UJIi
2C0siQkVFcDmQ6kzCmMlJFMJCwLOSqORfKxIrWrW54aZHcMRLzB3H9nI9nYnkTuX1uw4Bpsc2sOY
zFd9Knop+sWMQvdAvgVXHuWK0IYGo24kztzmQeIPUsBIrtkcyy2BK7VASWF1bHA539kellerGqqO
/M3mM0JGKu3MbtFoXpDJSaAay2oSLz0bJ2pCadhDQ2G8snkIoTHIwMhx49ZVmjmBziTGKTRkqITU
DYEBxAgRHiFkCeJuErFPugpTbsl4zhlXyNJQAmMpMgFYRLhrvASG823mo2FlyokLjmK3KSgqDk5/
grcloRmR9K2kNIjPGVRcTttpzxjHBiRiWZHo2JysUjFZrGthGFpXxwhMYFWPKhrUGCG6TKDA0kBq
GRxlQ2y65YVEqsxhVJaE1AR2DSoW8VEJl44ktLvcMVleJNai2QhZFxJ222a7lPENG80sN2qMq2Eo
vGOESgcVSMxZQjqObEzebByRqDt1xxe6eAztllljOlZOveTWZUMi8dIuyIxGIpbR3gzHK0wJSN5Y
VGKiYzMm6ZyTxlASJFZzIEFbrOOZcY6CKFoioRcTLDXE20iTImmBE9a6MsyWrTXhFzHKcSOcDAVS
JqRWWEogLMe+6NxrhBR6Gky9S3DlL6ZVoNkaGQ8mEM6oW0a+AbemGq0tLhy4qqtEiQiJdwtOmKdC
+cxEZRKiM9AgoakXKa5LcLroMoG7gslEkIfg5skauhMpXIbI0CsbbtSKifrevIgbhIrLPx2quksd
B2sHjBtWK2kxIpq0CUysegkZFkKy1dMTMZAgFBTrjjMiFxGkYPN+Zp0uZB6piGQg4xW0qacOCRtG
zNRWZFReSI8ZEzqYOumOett17ab6r1rl2qlCqyEovBifUckc1xLCqSLprCS2DnOCtNoem7eY0pYR
KrIcOxUJaFZAxHHrXyiRiUZfktNxXEGvKOG8gXmLkid7CLylRxJwsZK1jaXDA+QxGzTbcYF2Ai9Y
KxTNrHMiRpQwHWJsMDQ5FQ5I0qJ2x063Es3R9BmmZWiznbN8/QQTLVuO6TEY5CKoSHY5iKiwSpFk
QLPo2rjwv3SKmlkaUxjjNJFc5yMfVuTq6/YZxgxXhfcIi+mtDF3e+USSzVXJQ2DA8BDY9xAxEho7
KzEBw6Ep2LVImUBmjADdP4rnQHYHWsQQrAJmGEzMze/2klCcYvYnKYGDeEnO7b8fBVJdKZ0u8oS/
GGc4uKqaySQ45bmJaiWtZHijeleDQiorUEbH7XJ0cERSxY3hmry9OmUEuLI0w3VUZuMWIwjYWBxF
gK4LexAvwnkp18ArCifGPxdn4pa0bByvn9b6i93HmEK4vOJc8GuFpKYYMEkLRBETlgChYraTkOSY
xgmJFX6QIPXRVHnTAuNGn7DrJXkjANByDzwniKxhSUKaJyHKDTzlZXA2ERZynrid0wIEAAUlLJFg
kGBXbxtTkpOUmyqMF2rqFeeC3BbGh/VB0EtIET+IjogyZJBsiyEOkTQdBV1K7CQOm2OiUqG0TQoW
z1YSstnLw7bseOMYhISsuHgNet3XKEZwDiIIjCYyYuvV5AjHWDQmXV1C6XnzI7gHGPBeHrPpEBLm
tHPU3606iJg2EjI3jHYTIokXGw46VGEjcQN54Yst6xLxjYAH51zJILD3lC4x82lnkeDOaxgHDlyy
MLbb+T7/CAGdiVnSRcQ61llJl/BDjp/taCkYc/DASCtKr+5Q+tzYZPBQZulK5nKTIGNt1MJOCTAC
RixaS8N2kzkVHeQ6nwEXJFqRtoQvuPA94o6rUgcEYEi88TasTtKek1fmOxVl1D8OFuotJ9BGp9+z
u+OqRB0J6p4nQe7PrV33wogrIwdoVuQCL/IRr63jFBDgAWpag4iU7zqGeJkLMDjNhQey+HW5pEp4
5ETeKYHbmYdCZ7e6fLtWpA6D0ABRYMTZD8gzQXAohE5hISC+qzCU2aErZLtjxa1FW1MVXuvLuNLd
IV9EDG0L+rmq/c1/2Oeo5cW3p3MDyO/428KyYpiPRWoSMd5hi1wJLdaeuAOMSJb5v3dtYENKgvax
iBg3pIRCV/AIuEMFvUSCy8j3PjxRzyYM4/d5TkmaKEwLmSk54+o8T1neRJD0SHP+d3gbcjoXlZQ9
owjcZ9/l5OTLhjaIyG+dBgg80GNBmQ0bM5DnTj5ESvtqSXHJk0xQkbahsLJ7mLThQpbucAWLb4Hw
iI69JQRF8TowAz0MiupEhMJfegTSCdi+AHYeOYttYAQGSyDkTR0NaxCu8r65/QPHzHH7NNtajLsr
uGzOIDg2JBwwucKuwDUAGWbLrZ8vwphGPTgtH+ckBBIngMxCfucFN0MkHPdEAJ7F2+PqquwGYRna
i5MksMRmXgC5bAkxkceOyhehWiRAhD9xhGceNHIJG2JedN4fGg6ABkCzQeZ7BDIH58Ksl7PVBmTA
5kkRgEBdfhjE80jiyBmQUZB5S5N1PNNd9N3isQ7suTbOy4ipsaDtVSwrxkGfQ23urr4bRLNqyl8y
bJcgjKrOv9Ngj7LKva2JbYDOmGHYcTwO1R2wiwjmWApF8sIM5KUSf6kEkAeKLJDd7mIlMgYtHHBA
EuF3ckyPIvLEIsomQmZIZWCMJCMTDbWMeIkWkMlfBDCW7eY+0YXGpWe3MSezm6EdyU7/vnBsogLe
+gAMu2K+1DjJkw0j9qZLKYWAu4VDA5A7i3yCbt4HIGSOQjiSOwCzPLGueDItM0X1+Puxsen2dwj3
rkJFeQgKSSaC9hAgO7un5B6eJM986DDkiNEooEzlBIHBQB/k9MEUQSk2u0uIoR82v+XwOz4SJbtz
iLgAw8sMIrRBzcspwKlNIkVlpvNiGjMhDEiEDuvJr0MwMy0reVTdIkcL8nSp/Ww5KBsK5hT7XeEV
32cXmgh2bA+TQXLQVZ7gdxjX/uGXxX76xHFBrNAWv3kUG9NDaNdDoJRFbPdwAPmkv1J9VzYa9oDd
YVSZQTEU6gmUB0l1QoUNBl0PqTa7pS2GYLBe2PC1LPXp4JibEb0JNitjVElEomxpLE4TSMgHoyCR
UtdaKWqBWqtBCtKIIBTbbbbbOgQOgCoeMuqOjIFkkwHYaZtkeDLczNAQerfE5LExraZlwHKqL7Km
czUHMXDk3ogpSasFUmD2DPUnCNzh2SSLfl0EfP2AdVisjO3pxMF/VdoMMDDAwmGTEwylEB0rPWFW
CgRkn2YgHhDHzVkfeu/h2CwdckLzvBe4R4d1x9CYRx3WJjXPy65qYSOJWBcEhkFO9VRsPSnJBXbh
pHZWuKZbkgEhKoF6TQhkYshHejDr5n1HQ4gzDH5PyXKP/xdyRThQkFHCPn4=

Reply via email to