Hi,
the attached patch is meant to implement rfc3986, and eventually
supersede rfc1738, which is not API-compatible with.
It implements two methods, templatized to be available to both SBuf
and std::string using the same implementation (called String
hereafter):
String rfc3986_unescape(const String &)
returns a new String () which percent-decodes the argument.
String rfc3986_escape(const String &, const CharacterSet&)
returns a new String which percent-encodes the first argument
according to the characters-to-be-escaped mask passed as second
argument (defaults to "unsafe and ctrls", which is defined the same as
in current rfc1738).
RFC3986 is a small auxiliary class meant to have a namespace-like
behavior for a bunch of constant CharacterSet which implement all
currently defined escape-masks, plus these in RFC 3986.
Unit tests have been adapted from rfc1738's and behavior matches
between the two implementations.
Unscientific performance testing (the code is in a disabled-by-default
unit test for those wishing to check the method) shows that the
rfc3986 implementation is faster than rfc1738's both in escaping and
unescaping by about 50%, except probably in the case where a fully
in-place unescaping is possible (this has not been tested).
This patch requires the SBuf API patch and the CharacterSet patch I
recently posted for review.
The feature-branch is at lp:~kinkie/squid/rfc3986
Thanks
--
Francesco
=== added file 'include/rfc3986.h'
--- include/rfc3986.h 1970-01-01 00:00:00 +0000
+++ include/rfc3986.h 2015-12-27 18:34:58 +0000
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_INCLUDE_RFC3986_H_
+#define SQUID_INCLUDE_RFC3986_H_
+
+#include "base/CharacterSet.h"
+
+// duplicate from rfc1738.c
+static int
+fromhex(char ch)
+{
+ if (ch >= '0' && ch <= '9')
+ return ch - '0';
+ if (ch >= 'a' && ch <= 'f')
+ return ch - 'a' + 10;
+ if (ch >= 'A' && ch <= 'F')
+ return ch - 'A' + 10;
+ return -1;
+}
+
+// return a static 2-char buffer with a hex representation of argument
+static char*
+tohex(const unsigned char c)
+{
+ static char rv[3];
+ (void) snprintf ( rv, 3, "%02X", c);
+ return rv;
+}
+
+/** unescape a percent-encoded string
+ *
+ * API-compatible with std::string and SBuf
+ */
+template <class String>
+String rfc3986_unescape(const String &s)
+{
+ typename String::size_type pos=s.find('%');
+ if (pos == String::npos)
+ return s;
+ String rv;
+ rv.reserve(s.length());
+ const auto e=s.end();
+ for (auto in = s.begin(); in != e; ++in) {
+ if (*in != '%') { // normal case, copy and continue
+ rv.push_back(*in);
+ continue;
+ }
+ auto ti = in;
+ ++ti;
+ if (ti == e) { // String ends in %
+ rv.push_back(*in);
+ break;
+ }
+ if (*ti == '%') { //double '%' escaping
+ rv.push_back(*in);
+ ++in;
+ continue;
+ }
+ const int v1 = fromhex(*ti);
+ if (v1 < 0) { // decoding failed at first hextdigit
+ rv.push_back(*in);
+ continue;
+ }
+ ++ti;
+ if (ti == e) { // String ends in '%[[:hexdigit:]]'
+ rv.push_back(*in);
+ continue;
+ }
+ const int v2 = fromhex(*ti);
+ if (v2 < 0) { // decoding failed at second hextdigit
+ rv.push_back(*in);
+ continue;
+ }
+ const int x = v1 << 4 | v2;
+ if (x > 0 && x <= 255) {
+ rv.push_back(static_cast<char>(x));
+ ++in; ++in;
+ continue;
+ }
+ rv.push_back(*in);
+ }
+ return rv;
+}
+
+class RFC3986 {
+public:
+ const static CharacterSet
+ Unsafe, // RFC 1738 unsafe set
+ Ctrls, // control characters (\0x00 to \0x1f)
+ UnsafeAndCtrls, // RFC 1738 Unsafe and Ctrls
+ Reserved1738, // RFC 1738 Reserved set
+ GenDelims,// RFC 3986 gen-delims set
+ SubDelims,// RFC 3986 sub-delims set
+ Reserved, // RFC 3986 reserved characters set
+ Unreserved, // RFC 3986 unreserved characters set
+ Unescaped,//ctrls and unsafe except for percent symbol
+ All;
+
+};
+
+template <class String>
+String rfc3986_escape(const String &s, const CharacterSet &escapeChars = RFC3986::UnsafeAndCtrls)
+{
+ String rv;
+ bool didEscape = false;
+ rv.reserve(s.length()*2); //TODO: optimize arbitrary constant
+ for (auto c : s) {
+ if (escapeChars[c]) {
+ rv.push_back('%');
+ char *hex=tohex(c);
+ rv.push_back(hex[0]);
+ rv.push_back(hex[1]);
+ didEscape = true;
+ } else {
+ rv.push_back(c);
+ }
+ }
+ if (didEscape)
+ return rv;
+ else
+ return s;
+}
+
+#endif /* SQUID_INCLUDE_RFC3986_H_ */
=== modified file 'lib/Makefile.am'
--- lib/Makefile.am 2015-08-03 03:50:25 +0000
+++ lib/Makefile.am 2015-12-27 18:34:58 +0000
@@ -42,62 +42,62 @@ EXTRA_DIST += sspwin32.cc
endif
if ENABLE_AUTH_NTLM
SUBDIRS += ntlmauth
endif
#
# dirent.c, encrypt.c and getopt.c are needed for native Windows support.
#
EXTRA_libmiscutil_la_SOURCES = \
dirent.c \
encrypt.c \
getopt.c
libmiscencoding_la_SOURCES = \
base64.c \
charset.c \
html_quote.c \
md5.c \
rfc1738.c \
rfc2617.c \
+ rfc3986.cc \
uudecode.c
libmisccontainers_la_SOURCES = \
hash.cc
libmiscutil_la_SOURCES = \
getfullhostname.c \
heap.c \
iso3307.c \
radix.c \
rfc1123.c \
$(SNPRINTFSOURCE) \
Splay.cc \
stub_memaccount.c \
util.c \
xusleep.c
TESTS += tests/testRFC1738
check_PROGRAMS += tests/testRFC1738
tests_testRFC1738_SOURCES= \
tests/testRFC1738.h \
tests/testRFC1738.cc
tests_testRFC1738_LDADD= \
$(SQUID_CPPUNIT_LA) $(SQUID_CPPUNIT_LIBS) \
$(top_builddir)/lib/libmiscencoding.la \
$(top_builddir)/lib/libmiscutil.la \
$(COMPAT_LIB)
tests_testRFC1738_LDFLAGS = $(LIBADD_DL)
-
## Special Universal .h dependency test script
## aborts if error encountered
testHeaders: $(top_srcdir)/include/*.h
$(SHELL) $(top_srcdir)/test-suite/testheaders.sh "$(CXXCOMPILE)" $^ || exit 1
TESTS += testHeaders
CLEANFILES += testHeaders
.PHONY: testHeaders
=== added file 'lib/rfc3986.cc'
--- lib/rfc3986.cc 1970-01-01 00:00:00 +0000
+++ lib/rfc3986.cc 2015-12-27 18:34:58 +0000
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "rfc3986.h"
+#include "base/CharacterSet.h"
+
+const CharacterSet
+ RFC3986::Unsafe("rfc1738:unsafe", "<>\"# %{}|\\^~[]`'"),
+ RFC3986::Ctrls("rfc1738:ctrls", {{0x00, 0x1f}, {0x7f,0xff}}),
+ RFC3986::Reserved1738("rfc1738:reserved", ";/?:@=&"),
+ RFC3986::UnsafeAndCtrls = Unsafe + Ctrls,
+ RFC3986::GenDelims("rfc3986:gen-delims",":/?#[]@"),
+ RFC3986::SubDelims("rfc3986:sub-delims","!$&'()*+,;="),
+ RFC3986::Reserved = (GenDelims + SubDelims).rename("rfc3986:reserved"),
+ RFC3986::Unreserved = CharacterSet("rfc3986:unreserved","-._~") +
+ CharacterSet::ALPHA + CharacterSet::DIGIT,
+ RFC3986::Unescaped = (UnsafeAndCtrls - CharacterSet(nullptr,"%") ).rename("rfc1738:unescaped"),
+ RFC3986::All = (Unsafe + Reserved + Ctrls).rename("all")
+ ;
=== modified file 'src/Makefile.am'
--- src/Makefile.am 2015-12-23 12:08:55 +0000
+++ src/Makefile.am 2015-12-27 18:34:58 +0000
@@ -3808,31 +3808,52 @@ tests_testLookupTable_LDADD = \
$(XTRA_LIBS)
tests_testLookupTable_DEPENDENCIES = $(SQUID_CPPUNIT_LA)
tests_testEnumIterator_SOURCES = \
base/EnumIterator.h \
tests/stub_debug.cc \
tests/stub_libmem.cc \
tests/testEnumIterator.h \
tests/testEnumIterator.cc
nodist_tests_testEnumIterator_SOURCES = \
$(TESTSOURCES)
tests_testEnumIterator_LDFLAGS = $(LIBADD_DL)
tests_testEnumIterator_LDADD = \
base/libbase.la \
$(SQUID_CPPUNIT_LIBS) \
$(COMPAT_LIB) \
$(SQUID_CPPUNIT_LA) \
$(XTRA_LIBS)
tests_testEnumIterator_DEPENDENCIES =
+check_PROGRAMS += tests/testRFC3986
+tests_testRFC3986_SOURCES = \
+ $(SBUF_SOURCE) \
+ String.cc \
+ tests/stub_debug.cc \
+ tests/stub_libmem.cc \
+ tests/stub_SBufDetailedStats.cc \
+ tests/testRFC3986.h \
+ tests/testRFC3986.cc
+nodist_tests_testRFC3986_SOURCES = \
+ $(TESTSOURCES)
+tests_testRFC3986_LDFLAGS = $(LIBADD_DL)
+tests_testRFC3986_LDADD = \
+ base/libbase.la \
+ $(top_builddir)/lib/libmiscencoding.la \
+ $(SQUID_CPPUNIT_LIBS) \
+ $(COMPAT_LIB) \
+ $(SQUID_CPPUNIT_LA) \
+ $(XTRA_LIBS)
+tests_testRFC3986_DEPENDENCIES =
+
TESTS += testHeaders
## Special Universal .h dependency test script
## aborts if error encountered
testHeaders: $(srcdir)/*.h $(srcdir)/DiskIO/*.h $(srcdir)/DiskIO/*/*.h
$(SHELL) $(top_srcdir)/test-suite/testheaders.sh "$(CXXCOMPILE)" $^ || exit 1
## src/repl/ has no .h files and its own makefile.
CLEANFILES += testHeaders
.PHONY: testHeaders
=== added file 'src/tests/testRFC3986.cc'
--- src/tests/testRFC3986.cc 1970-01-01 00:00:00 +0000
+++ src/tests/testRFC3986.cc 2015-12-27 18:34:58 +0000
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#include "squid.h"
+#include "SBuf.h"
+#include "testRFC3986.h"
+#include "rfc1738.h"
+#include "unitTestMain.h"
+
+#include <cassert>
+
+/* Being a C library code it is best bodily included and tested with C++ type-safe techniques. */
+#include "lib/rfc3986.cc"
+
+CPPUNIT_TEST_SUITE_REGISTRATION( testRFC3986 );
+
+static void
+performDecodingTest(const std::string &encoded_str, const std::string &plaintext_str)
+{
+ std::string decoded_str = rfc3986_unescape(encoded_str);
+ CPPUNIT_ASSERT_EQUAL(plaintext_str, decoded_str);
+
+ SBuf encoded_sbuf(encoded_str);
+ SBuf plaintext_sbuf(plaintext_str);
+ SBuf decoded_sbuf = rfc3986_unescape(encoded_sbuf);
+ CPPUNIT_ASSERT_EQUAL(plaintext_sbuf, decoded_sbuf);
+}
+
+/* Regular Format de-coding tests */
+void testRFC3986::testUrlDecode()
+{
+ performDecodingTest("%2Fdata%2Fsource%2Fpath","/data/source/path");
+ performDecodingTest("http://foo.invalid%2Fdata%2Fsource%2Fpath",
+ "http://foo.invalid/data/source/path");
+ // TODO query string
+
+ performDecodingTest("1 w%0Ard","1 w\nrd"); // Newline %0A encoded
+ performDecodingTest("2 w%rd","2 w%rd"); // Un-encoded %
+ performDecodingTest("3 w%%rd","3 w%rd"); // encoded %
+ performDecodingTest("5 Bad String %1","5 Bad String %1"); // corrupt string
+ performDecodingTest("6 Bad String %1A%3","6 Bad String \032%3"); //partly corrupt string
+ performDecodingTest("7 Good String %1A","7 Good String \032"); // non corrupt string
+ //test various endings
+ performDecodingTest("8 word%","8 word%");
+ performDecodingTest("9 word%z","9 word%z");
+ performDecodingTest("10 word%1","10 word%1");
+ performDecodingTest("11 word%1q","11 word%1q");
+ performDecodingTest("12 word%1a","12 word\032");
+ }
+
+// perform a test for std::string, SBuf and if rfc1738flag is != 0 compare
+// against rfc1738 implementation
+static void
+performEncodingTest(const char *plaintext_str, const char *encoded_str, int rfc1738flag, const CharacterSet &rfc3986CSet)
+{
+ CPPUNIT_ASSERT_EQUAL(std::string(encoded_str), rfc3986_escape(std::string(plaintext_str), rfc3986CSet));
+ CPPUNIT_ASSERT_EQUAL(SBuf(encoded_str), rfc3986_escape(SBuf(plaintext_str), rfc3986CSet));
+ if (!rfc1738flag)
+ return;
+ char *result = rfc1738_do_escape(plaintext_str, rfc1738flag);
+ CPPUNIT_ASSERT_EQUAL(std::string(encoded_str), std::string(result));
+}
+void testRFC3986::testUrlEncode()
+{
+ /* TEST: Escaping only unsafe characters */
+ performEncodingTest("http://foo.invalid/data/source/path",
+ "http://foo.invalid/data/source/path",
+ RFC1738_ESCAPE_UNSAFE, RFC3986::Unsafe);
+
+ /* regular URL (no encoding needed) */
+ performEncodingTest("http://foo.invalid/data/source/path",
+ "http://foo.invalid/data/source/path",
+ RFC1738_ESCAPE_UNSAFE, RFC3986::Unsafe);
+
+ /* long string of unsafe # characters */
+ performEncodingTest("################ ################ ################ ################ ################ ################ ################ ################",
+ "%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%20%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23%23",
+ RFC1738_ESCAPE_UNSAFE, RFC3986::Unsafe);
+
+ /* TEST: escaping only reserved characters */
+
+ /* regular URL (full encoding requested) */
+ performEncodingTest("http://foo.invalid/data/source/path",
+ "http%3A%2F%2Ffoo.invalid%2Fdata%2Fsource%2Fpath",
+ RFC1738_ESCAPE_RESERVED, RFC3986::Reserved);
+
+ /* regular path (encoding wanted for ALL special chars) */
+ performEncodingTest("/data/source/path",
+ "%2Fdata%2Fsource%2Fpath",
+ RFC1738_ESCAPE_RESERVED, RFC3986::Reserved);
+
+ /* TEST: safety-escaping a string already partially escaped */
+
+ /* escaping of dangerous characters in a partially escaped string */
+ performEncodingTest("http://foo.invalid/data%2Fsource[]",
+ "http://foo.invalid/data%2Fsource%5B%5D",
+ RFC1738_ESCAPE_UNESCAPED, RFC3986::Unescaped);
+
+ /* escaping of hexadecimal 0xFF characters in a partially escaped string */
+ performEncodingTest("http://foo.invalid/data%2Fsource\xFF\xFF",
+ "http://foo.invalid/data%2Fsource%FF%FF",
+ RFC1738_ESCAPE_UNESCAPED, RFC3986::Unescaped);
+}
+
+/** SECURITY BUG TESTS: avoid null truncation attacks by skipping %00 bytes */
+void testRFC3986::PercentZeroNullDecoding()
+{
+ /* Attack with %00 encoded NULL */
+ performDecodingTest("w%00rd", "w%00rd");
+
+ /* Attack with %0 encoded NULL */
+ performDecodingTest("w%0rd", "w%0rd");
+
+ /* Handle '0' bytes embeded in encoded % */
+ performDecodingTest("w%%00%rd", "w%00%rd");
+
+ /* Handle NULL bytes with encoded % */
+ performDecodingTest("w%%%00%rd", "w%%00%rd");
+}
+
+void
+testRFC3986::testPerformance()
+{
+
+}
+
+
=== added file 'src/tests/testRFC3986.h'
--- src/tests/testRFC3986.h 1970-01-01 00:00:00 +0000
+++ src/tests/testRFC3986.h 2015-12-27 18:34:58 +0000
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
+ *
+ * Squid software is distributed under GPLv2+ license and includes
+ * contributions from numerous individuals and organizations.
+ * Please see the COPYING and CONTRIBUTORS files for details.
+ */
+
+#ifndef SQUID_LIB_TEST_RFC3986_H
+#define SQUID_LIB_TEST_RFC3986_H
+
+#include <cppunit/extensions/HelperMacros.h>
+
+/**
+ * Test the URL coder RFC 3986 Engine
+ */
+class testRFC3986 : public CPPUNIT_NS::TestFixture
+{
+ CPPUNIT_TEST_SUITE( testRFC3986 );
+ CPPUNIT_TEST( testUrlDecode );
+ CPPUNIT_TEST( testUrlEncode );
+ CPPUNIT_TEST( PercentZeroNullDecoding );
+ CPPUNIT_TEST( testPerformance );
+ CPPUNIT_TEST_SUITE_END();
+
+public:
+
+protected:
+ void testUrlDecode();
+ void testUrlEncode();
+
+ // bugs.
+ void PercentZeroNullDecoding();
+ void testPerformance();
+};
+
+#endif /* SQUID_LIB_TEST_RFC3986_H */
+
_______________________________________________
squid-dev mailing list
[email protected]
http://lists.squid-cache.org/listinfo/squid-dev