Hi,
the attached patch implements all proposals in this thread.
=== modified file 'src/base/CharacterSet.cc'
--- src/base/CharacterSet.cc 2015-01-13 07:25:36 +0000
+++ src/base/CharacterSet.cc 2015-12-30 15:46:39 +0000
@@ -1,55 +1,63 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#include "squid.h"
#include "CharacterSet.h"
#include <algorithm>
+#include <iostream>
#include <functional>
CharacterSet &
CharacterSet::operator +=(const CharacterSet &src)
{
Storage::const_iterator s = src.chars_.begin();
const Storage::const_iterator e = src.chars_.end();
Storage::iterator d = chars_.begin();
while (s != e) {
if (*s)
*d = 1;
++s;
++d;
}
return *this;
}
-CharacterSet
-CharacterSet::operator +(const CharacterSet &src) const
+CharacterSet &
+CharacterSet::operator -=(const CharacterSet &src)
{
- CharacterSet rv(*this);
- rv += src;
- return rv;
+ Storage::const_iterator s = src.chars_.begin();
+ const Storage::const_iterator e = src.chars_.end();
+ Storage::iterator d = chars_.begin();
+ while (s != e) {
+ if (*s)
+ *d = 0;
+ ++s;
+ ++d;
+ }
+ return *this;
}
CharacterSet &
CharacterSet::add(const unsigned char c)
{
chars_[static_cast<uint8_t>(c)] = 1;
return *this;
}
CharacterSet &
CharacterSet::addRange(unsigned char low, unsigned char high)
{
//manual loop splitting is needed to cover case where high is 255
// otherwise low will wrap, resulting in infinite loop
while (low < high) {
chars_[static_cast<uint8_t>(low)] = 1;
++low;
}
chars_[static_cast<uint8_t>(high)] = 1;
return *this;
@@ -64,54 +72,74 @@ CharacterSet::complement(const char *lab
std::logical_not<Storage::value_type>());
return result;
}
CharacterSet::CharacterSet(const char *label, const char * const c) :
name(label == NULL ? "anonymous" : label),
chars_(Storage(256,0))
{
const size_t clen = strlen(c);
for (size_t i = 0; i < clen; ++i)
add(c[i]);
}
CharacterSet::CharacterSet(const char *label, unsigned char low, unsigned char high) :
name(label == NULL ? "anonymous" : label),
chars_(Storage(256,0))
{
addRange(low,high);
}
+CharacterSet::CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t, uint8_t>> ranges) :
+ name(label == NULL ? "anonymous" : label),
+ chars_(Storage(256,0))
+{
+ for (auto range: ranges)
+ addRange(range.first, range.second);
+}
+
+CharacterSet
+operator+ (CharacterSet lhs, const CharacterSet &rhs)
+{
+ lhs += rhs;
+ return lhs;
+}
+
+CharacterSet
+operator- (CharacterSet lhs, const CharacterSet &rhs)
+{
+ lhs -= rhs;
+ return lhs;
+}
+
+std::ostream&
+operator <<(std::ostream &s, const CharacterSet &c)
+{
+ s << "CharacterSet(" << c.name << ')';
+ return s;
+}
+
const CharacterSet
// RFC 5234
-CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
- CharacterSet::BIT("BIT","01"),
- CharacterSet::CR("CR","\r"),
-#if __cplusplus == 201103L
-//CharacterSet::CTL("CTL",{{0x01,0x1f},{0x7f,0x7f}}),
-#endif
- CharacterSet::DIGIT("DIGIT","0123456789"),
- CharacterSet::DQUOTE("DQUOTE","\""),
- CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"),
- CharacterSet::HTAB("HTAB","\t"),
- CharacterSet::LF("LF","\n"),
- CharacterSet::SP("SP"," "),
- CharacterSet::VCHAR("VCHAR", 0x21, 0x7e),
-// RFC 7230
- CharacterSet::WSP("WSP"," \t"),
-#if __cplusplus == 201103L
-//CharacterSet::CTEXT("ctext",{{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}),
-#endif
- CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
- CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"),
-#if __cplusplus == 201103L
-//CharacterSet::QDTEXT("QDTEXT",{{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}),
-#endif
- CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff),
-// RFC 7232
-#if __cplusplus == 201103L
-//CharacterSet::ETAGC("ETAGC",{{0x21,0x21},{0x23,0x7e},{0x80,0xff}}),
-#endif
-// RFC 7235
- CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
- ;
-
+ CharacterSet::ALPHA("ALPHA", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
+ CharacterSet::BIT("BIT","01"),
+ CharacterSet::CR("CR","\r"),
+ CharacterSet::CTL("CTL",{{0x01,0x1f},{0x7f,0x7f}}),
+ CharacterSet::DIGIT("DIGIT","0123456789"),
+ CharacterSet::DQUOTE("DQUOTE","\""),
+ CharacterSet::HEXDIG("HEXDIG","0123456789aAbBcCdDeEfF"),
+ CharacterSet::HTAB("HTAB","\t"),
+ CharacterSet::LF("LF","\n"),
+ CharacterSet::SP("SP"," "),
+ CharacterSet::VCHAR("VCHAR", 0x21, 0x7e),
+ // RFC 7230
+ CharacterSet::WSP("WSP"," \t"),
+ CharacterSet::CTEXT("ctext",{{0x09,0x09},{0x20,0x20},{0x2a,0x5b},{0x5d,0x7e},{0x80,0xff}}),
+ CharacterSet::TCHAR("TCHAR","!#$%&'*+-.^_`|~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"),
+ CharacterSet::SPECIAL("SPECIAL","()<>@,;:\\\"/[]?={}"),
+ CharacterSet::QDTEXT("QDTEXT",{{0x09,0x09},{0x20,0x21},{0x23,0x5b},{0x5d,0x7e},{0x80,0xff}}),
+ CharacterSet::OBSTEXT("OBSTEXT",0x80,0xff),
+ // RFC 7232
+ CharacterSet::ETAGC("ETAGC",{{0x21,0x21},{0x23,0x7e},{0x80,0xff}}),
+ // RFC 7235
+ CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+ ;
=== modified file 'src/base/CharacterSet.h'
--- src/base/CharacterSet.h 2015-01-13 07:25:36 +0000
+++ src/base/CharacterSet.h 2015-12-30 15:46:39 +0000
@@ -1,123 +1,141 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef _SQUID_SRC_PARSER_CHARACTERSET_H
#define _SQUID_SRC_PARSER_CHARACTERSET_H
+#include <initializer_list>
+#include <iosfwd>
#include <vector>
/// optimized set of C chars, with quick membership test and merge support
class CharacterSet
{
public:
typedef std::vector<uint8_t> Storage;
/// define a character set with the given label ("anonymous" if NULL)
/// with specified initial contents
CharacterSet(const char *label, const char * const initial);
/// define a character set with the given label ("anonymous" if NULL)
/// containing characters defined in the supplied ranges
/// \see addRange
CharacterSet(const char *label, unsigned char low, unsigned char high);
+ /// define a character set with the given label ("anonymous" if NULL)
+ /// containing characters defined in the supplied list of low-high ranges
+ /// \see addRange
+ CharacterSet(const char *label, std::initializer_list<std::pair<uint8_t,uint8_t>> ranges);
+
/// whether a given character exists in the set
bool operator[](unsigned char c) const {return chars_[static_cast<uint8_t>(c)] != 0;}
/// add a given character to the character set
CharacterSet & add(const unsigned char c);
/// add a list of character ranges, expressed as pairs [low,high], including both ends
CharacterSet & addRange(unsigned char low, unsigned char high);
- /// add all characters from the given CharacterSet to this one
- CharacterSet &operator +=(const CharacterSet &src);
+ /// set addition: add to this set all characters that are also in src
+ CharacterSet &operator +=(const CharacterSet &rhs);
- /// return a new CharacterSet containing the union of two sets
- CharacterSet operator +(const CharacterSet &src) const;
+ /// set subtraction: remove all characters that are also in rhs
+ CharacterSet &operator -=(const CharacterSet &rhs);
/// return a new CharacterSet containing characters not in this set
CharacterSet complement(const char *complementLabel = NULL) const;
/// change name; handy in const declarations that use operators
CharacterSet &rename(const char *label) { name = label; return *this; }
+ /// comparison operator. Ignores label
+ bool operator == (const CharacterSet &cs) const { return chars_ == cs.chars_; }
+ bool operator != (const CharacterSet &cs) const { return !operator==(cs); }
+
/// optional set label for debugging (default: "anonymous")
const char * name;
// common character sets, RFC 5234
// A-Za-z
static const CharacterSet ALPHA;
// 0-1
static const CharacterSet BIT;
// carriage return
static const CharacterSet CR;
// controls
-#if __cplusplus == 201103L
- // ready but disabled as needs C++11 constructor
- //static const CharacterSet CTL;
-#endif
+ static const CharacterSet CTL;
// 0-9
static const CharacterSet DIGIT;
// double quote
static const CharacterSet DQUOTE;
// 0-9aAbBcCdDeEfF
static const CharacterSet HEXDIG;
// horizontal tab
static const CharacterSet HTAB;
// line feed
static const CharacterSet LF;
// white space
static const CharacterSet SP;
// visible (printable) characters
static const CharacterSet VCHAR;
// <space><tab>
static const CharacterSet WSP;
// HTTP character sets, RFC 7230
// ctext
-#if __cplusplus == 201103L
- // ready but disabled as needs C++11 constructor
- //static const CharacterSet CTEXT;
-#endif
+ static const CharacterSet CTEXT;
// XXX: maybe field-vchar = VCHAR / obs-text
// any VCHAR except for SPECIAL
static const CharacterSet TCHAR;
// special VCHARs
static const CharacterSet SPECIAL;
// qdtext
-#if __cplusplus == 201103L
- // ready but disabled as needs C++11 constructor
- //static const CharacterSet QDTEXT;
-#endif
+ static const CharacterSet QDTEXT;
// obs-text
static const CharacterSet OBSTEXT;
// HTTP character sets, RFC 7232
// etagc
-#if __cplusplus == 201103L
- // ready but disabled as needs C++11 constructor
- //static const CharacterSet ETAGC;
-#endif
+ static const CharacterSet ETAGC;
// HTTP character sets, RFC 7235
// token68 (internal charaters only, excludes '=' terminator)
static const CharacterSet TOKEN68C;
private:
/** index of characters in this set
*
* \note guaranteed to be always 256 slots big, as forced in the
* constructor. This assumption is relied upon in operator[], add,
* operator+=
*/
Storage chars_;
};
+/** CharacterSet addition
+ *
+ * returns a new CharacterSet containing all characters present both in lhs
+ * and rhs, labeled as lhs is
+ */
+CharacterSet
+operator+ (CharacterSet lhs, const CharacterSet &rhs);
+
+/** CharacterSet subtraction
+ *
+ * returns a new CharacterSet containing all characters present in lhs
+ * and not present in rhs, labeled as lhs is
+ */
+CharacterSet
+operator- (CharacterSet lhs, const CharacterSet &rhs);
+
+std::ostream&
+operator <<(std::ostream &, const CharacterSet &);
+
#endif /* _SQUID_SRC_PARSER_CHARACTERSET_H */
=== modified file 'src/tests/testCharacterSet.cc'
--- src/tests/testCharacterSet.cc 2015-08-03 02:08:22 +0000
+++ src/tests/testCharacterSet.cc 2015-12-30 15:46:39 +0000
@@ -11,80 +11,112 @@
#include "testCharacterSet.h"
#include "unitTestMain.h"
#include <string>
CPPUNIT_TEST_SUITE_REGISTRATION( testCharacterSet );
void
testCharacterSet::CharacterSetConstruction()
{
{
CharacterSet t(NULL,"");
CPPUNIT_ASSERT_EQUAL(std::string("anonymous"),std::string(t.name));
}
{
CharacterSet t("test","");
CPPUNIT_ASSERT_EQUAL(std::string("test"),std::string(t.name));
}
{
CharacterSet t("test","");
- for (int j = 0; j < 255; ++j)
+ for (int j = 0; j < 256; ++j)
CPPUNIT_ASSERT_EQUAL(false,t[j]);
}
{
CharacterSet t("test","0");
CPPUNIT_ASSERT_EQUAL(true,t['0']);
- for (int j = 0; j < 255; ++j)
- if (j != '0')
+ for (int j = 0; j < 256; ++j) {
+ if (j != '0') {
CPPUNIT_ASSERT_EQUAL(false,t[j]);
+ } else {
+ CPPUNIT_ASSERT_EQUAL(true,t[j]);
+ }
+ }
}
}
void
testCharacterSet::CharacterSetAdd()
{
CharacterSet t("test","0");
t.add(0);
CPPUNIT_ASSERT_EQUAL(true,t['\0']);
CPPUNIT_ASSERT_EQUAL(true,t['0']);
}
void
testCharacterSet::CharacterSetAddRange()
{
CharacterSet t("test","");
t.addRange('0','9');
CPPUNIT_ASSERT_EQUAL(true,t['0']);
CPPUNIT_ASSERT_EQUAL(true,t['5']);
CPPUNIT_ASSERT_EQUAL(true,t['9']);
CPPUNIT_ASSERT_EQUAL(false,t['a']);
}
void
testCharacterSet::CharacterSetConstants()
{
CPPUNIT_ASSERT_EQUAL(true,CharacterSet::ALPHA['a']);
CPPUNIT_ASSERT_EQUAL(true,CharacterSet::ALPHA['z']);
CPPUNIT_ASSERT_EQUAL(true,CharacterSet::ALPHA['A']);
CPPUNIT_ASSERT_EQUAL(true,CharacterSet::ALPHA['Z']);
CPPUNIT_ASSERT_EQUAL(false,CharacterSet::ALPHA['5']);
}
void
testCharacterSet::CharacterSetUnion()
{
{
CharacterSet hex("hex","");
hex += CharacterSet::DIGIT;
hex += CharacterSet(NULL,"aAbBcCdDeEfF");
- for (int j = 0; j < 255; ++j)
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, hex);
+ for (int j = 0; j < 256; ++j)
CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG[j],hex[j]);
}
{
CharacterSet hex(NULL,"");
hex = CharacterSet::DIGIT + CharacterSet(NULL,"aAbBcCdDeEfF");
- for (int j = 0; j < 255; ++j)
+ for (int j = 0; j < 256; ++j)
CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG[j],hex[j]);
}
}
+void
+testCharacterSet::CharacterSetEqualityOp()
+{
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::ALPHA, CharacterSet::ALPHA);
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet(NULL,"01"));
+ CPPUNIT_ASSERT_EQUAL(CharacterSet(NULL,"01"), CharacterSet(NULL,"01"));
+ CPPUNIT_ASSERT_EQUAL(CharacterSet(NULL,"01"), CharacterSet("","01"));
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",'0','1'));
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",{{'0','1'}}));
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::BIT, CharacterSet("bit",{{'0','0'},{'1','1'}}));
+}
+
+void
+testCharacterSet::CharacterSetSubtract()
+{
+ CharacterSet sample(NULL, "0123456789aAbBcCdDeEfFz");
+
+ sample -= CharacterSet(NULL, "z"); //character in set
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample);
+
+ sample -= CharacterSet(NULL, "z"); // character not in set
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample);
+
+ sample += CharacterSet(nullptr, "z");
+ // one in set, one not; test operator-
+ CPPUNIT_ASSERT_EQUAL(CharacterSet::HEXDIG, sample - CharacterSet(NULL, "qz"));
+}
=== modified file 'src/tests/testCharacterSet.h'
--- src/tests/testCharacterSet.h 2015-08-03 02:08:22 +0000
+++ src/tests/testCharacterSet.h 2015-12-27 13:27:20 +0000
@@ -1,33 +1,37 @@
/*
* Copyright (C) 1996-2015 The Squid Software Foundation and contributors
*
* Squid software is distributed under GPLv2+ license and includes
* contributions from numerous individuals and organizations.
* Please see the COPYING and CONTRIBUTORS files for details.
*/
#ifndef SQUID_BASE_TESTCHARACTERSET_H
#define SQUID_BASE_TESTCHARACTERSET_H
#include <cppunit/extensions/HelperMacros.h>
class testCharacterSet : public CPPUNIT_NS::TestFixture
{
CPPUNIT_TEST_SUITE( testCharacterSet );
CPPUNIT_TEST( CharacterSetConstruction );
CPPUNIT_TEST( CharacterSetAdd );
CPPUNIT_TEST( CharacterSetAddRange );
+ CPPUNIT_TEST( CharacterSetEqualityOp );
CPPUNIT_TEST( CharacterSetConstants );
CPPUNIT_TEST( CharacterSetUnion );
+ CPPUNIT_TEST( CharacterSetSubtract );
CPPUNIT_TEST_SUITE_END();
protected:
void CharacterSetConstruction();
void CharacterSetAdd();
void CharacterSetAddRange();
void CharacterSetConstants();
void CharacterSetUnion();
+ void CharacterSetEqualityOp();
+ void CharacterSetSubtract();
};
#endif /* SQUID_BASE_TESTCHARACTERSET_H */
_______________________________________________
squid-dev mailing list
[email protected]
http://lists.squid-cache.org/listinfo/squid-dev