This is an automated email from the ASF dual-hosted git repository. bneradt pushed a commit to branch doc-lexicon in repository https://gitbox.apache.org/repos/asf/trafficserver-libswoc.git
commit 4f8e916f3200252d6a50e7c399d6097b798bfcf4 Author: Alan M. Carroll <[email protected]> AuthorDate: Fri May 22 10:22:46 2020 -0500 Lexicon: Doc updates and example. --- doc/Doxyfile | 17 +--- doc/code/BW_Format.en.rst | 2 +- doc/code/IPSpace.en.rst | 2 + doc/code/Lexicon.en.rst | 78 +++++++++++++++ unit_tests/ex_Lexicon.cc | 250 +++++++++++++--------------------------------- 5 files changed, 150 insertions(+), 199 deletions(-) diff --git a/doc/Doxyfile b/doc/Doxyfile index f17a3e9..0ad9afb 100644 --- a/doc/Doxyfile +++ b/doc/Doxyfile @@ -790,7 +790,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = "../swoc++/src" "../swoc++/include/swoc" +INPUT = "../code/src" "../code/include/swoc" # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -2109,12 +2109,6 @@ EXTERNAL_GROUPS = YES EXTERNAL_PAGES = YES -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of 'which perl'). -# The default file (with absolute path) is: /usr/bin/perl. - -PERL_PATH = /usr/bin/perl - #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- @@ -2128,15 +2122,6 @@ PERL_PATH = /usr/bin/perl CLASS_DIAGRAMS = YES -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see: -# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. diff --git a/doc/code/BW_Format.en.rst b/doc/code/BW_Format.en.rst index 1f7bbf0..67218ed 100644 --- a/doc/code/BW_Format.en.rst +++ b/doc/code/BW_Format.en.rst @@ -347,7 +347,7 @@ width <bwf::Spec::_min>` in order to disable any framework alignment operation. It is important to note a formatter can call another formatter. For example, the formatter for :code:`std::string` looks like -.. literalinclude:: ../../swoc++/include/swoc/bwf_base.h +.. literalinclude:: ../../code/include/swoc/bwf_base.h :lines: 811-833 The code first copies the format specification and forces a leading radix. Next it does special diff --git a/doc/code/IPSpace.en.rst b/doc/code/IPSpace.en.rst index 9d3e318..ae301a5 100644 --- a/doc/code/IPSpace.en.rst +++ b/doc/code/IPSpace.en.rst @@ -83,6 +83,8 @@ properly formatted, otherwise the range will be default constructed to an invali also the :libswoc:`swoc::IPRange::load` method which returns a :code:`bool` to indicate if the parsing was successful. +.. _ip-space: + IPSpace ======= diff --git a/doc/code/Lexicon.en.rst b/doc/code/Lexicon.en.rst index a554a2d..b960d04 100644 --- a/doc/code/Lexicon.en.rst +++ b/doc/code/Lexicon.en.rst @@ -44,11 +44,89 @@ Usage Lexicons can be used in a dynamic or static fashion. The basic use is as a static translation object that converts between an enumeration and names. The constructors allow setting up the entire Lexicon. +The primary things to set up for a Lexicon are + +* The equivalence of names and values. +* The default (if any) for a name. +* The default (if any) for a value. + +Values and names can be associated either using pairs of values and names, or a pair of a value +and a list of names, the first of which is the primary name. This must be consistent for all of +the defined values, so if one value has multiple names, all names must use the value, name list form. + +In addition, defaults can be specified. Because all possible defaults have distinct signatures +there is no need to order them - the constructor can deduce what is meant. Defaults are very handy +when using a Lexicon for parsing - the default value can be an invalid value, in which case checking +an input token for being a valid name is very simple :: + + extern swoc::Lexicon<Types> lex; // Initialized elsewhere. + auto value = lex[token]; + if (value != INVALID) { // handle successful parse } + +Lexicon can also be used dynamically where the contents are built up over time or due to run time +inputs. One example is using Lexion to support enumeration or flag set columns for :ref:`ip-space`. +A configuration file can list the allowed / supported keys for the columns, which are then loaded +into a Lexicon and use to parse the data file. The key methods are + +* :libswoc:`Lexicon::define` which adds a value, name definition. +* :libswoc:`Lexicon::set_default` which sets a default. + +Each Lexicon has its own internal storage where copies of all of the strings are kept. This makes +dynamic use much easier and robust as there are no lifetime concerns with the strings. + +Lexicons can be used for "normalizing" pointers to strings. Double indexing will convert the +arbitrary pointer to the string to a consistent pointer, which can then be numerically compared for +equivalence. This is only a benefit if the pointer is to be stored and compared multiple times. :: + + token = lex[lex[token]]; // Normalize string pointer. + Examples ======== +For illustrative purposes, consider using :ref:`ip-space` where each address has a set of flags +representing the type of address, such as production, edge, secure, etc. This is stored in memory +as a ``std::bitset``. To load up the data a comma separated value file is provided which has the +first column as the IP address range and the subsequent values are flag names. + +The starting point is an enumeration with the address types: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.1.begin + :end-before: doc.1.end + +To do conversions a Lexicon is created: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.2.begin + :end-before: doc.2.end + +The file loading and parsing is then: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.load.begin + :end-before: doc.load.end + +This uses the Lexicon to convert the strings in the file to the enumeration values, which are the +bitset indices. The defalt is set to ``INVALID`` so that any string that doesn't match a string +in the Lexicon is mapped to ``INVALID``. + +Once the IP Space is loaded, lookup is simple, given an address: + +.. literalinclude:: ../../unit_tests/ex_Lexicon.cc + :start-after: doc.lookup.begin + :end-before: doc.lookup.end + +At this point ``flags`` has the set of flags stored for that address from the original data. Data +can be accessed like :: + if (flags[NetType::PROD]) { ... } Design Notes ************ +Lexicon was designed to solve a common problem I had with converting between enumerations and +strings. Simple arrays were, as noted in the introduction, were not adequate, particularly for +parsing. There was also some influence from internationalization efforts where the Lexicon could be +loaded with other languages. Secondary names have proven useful for parsing, allowing easy aliases +for the enumeration (e.g., for ``true`` for a boolean the names can be a list like "yes", "1", +"enable", etc.) diff --git a/unit_tests/ex_Lexicon.cc b/unit_tests/ex_Lexicon.cc index e2df5cc..7a53b12 100644 --- a/unit_tests/ex_Lexicon.cc +++ b/unit_tests/ex_Lexicon.cc @@ -2,199 +2,85 @@ // Copyright Verizon Media 2020 /** @file - Lexicon unit tests. + Lexicon example code. */ +#include <bitset> + #include "swoc/Lexicon.h" +#include "swoc/swoc_file.h" +#include "swoc/swoc_ip.h" #include "catch.hpp" // Example code for documentatoin // --- -enum class Example { INVALID, Value_0, Value_1, Value_2, Value_3 }; - -using ExampleNames = swoc::Lexicon<Example>; - -namespace -{ -[[maybe_unused]] ExampleNames Static_Names { - {Example::Value_0, {"zero", "0"}}, {Example::Value_1, {"one", "1"}}, {Example::Value_2, {"two", "2"}}, - {Example::Value_3, {"three", "3"}}, - { - Example::INVALID, { "INVALID" } - } +// This is the set of address flags +// doc.1.begin +enum class NetType { + EXTERNAL = 0, // 0x1 + PROD, // 0x2 + SECURE, // 0x4 + EDGE, // 0x8 + INVALID }; -} - -TEST_CASE("Lexicon Example", "[libts][Lexicon]") -{ - ExampleNames exnames{{Example::Value_0, {"zero", "0"}}, - {Example::Value_1, {"one", "1"}}, - {Example::Value_2, {"two", "2"}}, - {Example::Value_3, {"three", "3"}}, - {Example::INVALID, {"INVALID"}}}; - - ExampleNames exnames2{{Example::Value_0, "zero"}, - {Example::Value_1, "one"}, - {Example::Value_2, "two"}, - {Example::Value_3, "three"}, - {Example::INVALID, "INVALID"}}; - - // Check constructing with just defaults. - ExampleNames def_names_1 { Example::INVALID }; - ExampleNames def_names_2 { "INVALID" }; - ExampleNames def_names_3 { Example::INVALID, "INVALID" }; - - exnames.set_default(Example::INVALID).set_default("INVALID"); - - REQUIRE(exnames[Example::INVALID] == "INVALID"); - REQUIRE(exnames[Example::Value_0] == "zero"); - REQUIRE(exnames["zero"] == Example::Value_0); - REQUIRE(exnames["Zero"] == Example::Value_0); - REQUIRE(exnames["ZERO"] == Example::Value_0); - REQUIRE(exnames["one"] == Example::Value_1); - REQUIRE(exnames["1"] == Example::Value_1); - REQUIRE(exnames["Evil Dave"] == Example::INVALID); - REQUIRE(exnames[static_cast<Example>(0xBADD00D)] == "INVALID"); - REQUIRE(exnames[exnames[static_cast<Example>(0xBADD00D)]] == Example::INVALID); - - REQUIRE(def_names_1["zero"] == Example::INVALID); - REQUIRE(def_names_2[Example::Value_0] == "INVALID"); - REQUIRE(def_names_3["zero"] == Example::INVALID); - REQUIRE(def_names_3[Example::Value_0] == "INVALID"); - - enum class Radio { INVALID, ALPHA, BRAVO, CHARLIE, DELTA }; - using Lex = swoc::Lexicon<Radio>; - Lex lex({{Radio::INVALID, {"Invalid"}}, - {Radio::ALPHA, {"Alpha"}}, - {Radio::BRAVO, {"Bravo", "Beta"}}, - {Radio::CHARLIE, {"Charlie"}}, - {Radio::DELTA, {"Delta"}}}); - - // test structured binding for iteration. - for ([[maybe_unused]] auto const &[key, name] : lex) { - } -}; - -// --- -// End example code. - -enum Values { NoValue, LowValue, HighValue, Priceless }; -enum Hex { A, B, C, D, E, F, INVALID }; - -using ValueLexicon = swoc::Lexicon<Values>; -using HexLexicon = swoc::Lexicon<Hex>; - -TEST_CASE("Lexicon Constructor", "[libts][Lexicon]") -{ - // Construct with a secondary name for NoValue - ValueLexicon vl{{NoValue, {"NoValue", "garbage"}}, {LowValue, {"LowValue"}}}; - - REQUIRE("LowValue" == vl[LowValue]); // Primary name - REQUIRE(NoValue == vl["NoValue"]); // Primary name - REQUIRE(NoValue == vl["garbage"]); // Secondary name - REQUIRE_THROWS_AS(vl["monkeys"], std::domain_error); // No default, so throw. - vl.set_default(NoValue); // Put in a default. - REQUIRE(NoValue == vl["monkeys"]); // Returns default instead of throw - REQUIRE(LowValue == vl["lowVALUE"]); // Check case insensitivity. - - REQUIRE(NoValue == vl["HighValue"]); // Not defined yet. - vl.define(HighValue, {"HighValue", "High_Value"}); // Add it. - REQUIRE(HighValue == vl["HighValue"]); // Verify it's there and is case insensitive. - REQUIRE(HighValue == vl["highVALUE"]); - REQUIRE(HighValue == vl["HIGH_VALUE"]); - REQUIRE("HighValue" == vl[HighValue]); // Verify value -> primary name. - - // A few more checks on primary/secondary. - REQUIRE(NoValue == vl["Priceless"]); - REQUIRE(NoValue == vl["unique"]); - vl.define(Priceless, "Priceless", "Unique"); - REQUIRE("Priceless" == vl[Priceless]); - REQUIRE(Priceless == vl["unique"]); - - // Check default handlers. - using LL = swoc::Lexicon<Hex>; - bool bad_value_p = false; - LL ll_1({{A, "A"}, {B, "B"}, {C, "C"}, {E, "E"}}); - ll_1.set_default([&bad_value_p](std::string_view name) mutable -> Hex { - bad_value_p = true; - return INVALID; - }); - ll_1.set_default([&bad_value_p](Hex value) mutable -> std::string_view { - bad_value_p = true; - return "INVALID"; - }); - REQUIRE(bad_value_p == false); - REQUIRE(INVALID == ll_1["F"]); - REQUIRE(bad_value_p == true); - bad_value_p = false; - REQUIRE("INVALID" == ll_1[F]); - REQUIRE(bad_value_p == true); - bad_value_p = false; - // Verify that INVALID / "INVALID" are equal because of the default handlers. - REQUIRE("INVALID" == ll_1[INVALID]); - REQUIRE(INVALID == ll_1["INVALID"]); - REQUIRE(bad_value_p == true); - // Define the value/name, verify the handlers are *not* invoked. - ll_1.define(INVALID, "INVALID"); - bad_value_p = false; - REQUIRE("INVALID" == ll_1[INVALID]); - REQUIRE(INVALID == ll_1["INVALID"]); - REQUIRE(bad_value_p == false); - - ll_1.define({D, "D"}); // Pair style - ll_1.define({F, {"F", "0xf"}}); // Definition style - REQUIRE(ll_1[D] == "D"); - REQUIRE(ll_1["0XF"] == F); - - // iteration - std::bitset<INVALID + 1> mark; - for (auto [value, name] : ll_1) { - if (mark[value]) { - std::cerr << "Lexicon: " << name << ':' << value << " double iterated" << std::endl; - mark.reset(); - break; +// doc.1.end + +// The number of distinct flags. +static constexpr size_t N_TYPES = size_t(NetType::INVALID); + +// Set up a Lexicon to convert between the enumeration and strings. +// doc.2.begin +swoc::Lexicon<NetType> NetTypeNames { {{NetType::EXTERNAL, "external"}, + {NetType::PROD, "prod"}, + {NetType::SECURE, "secure"}, + {NetType::EDGE, "edge"}}, + NetType::INVALID // default value for undefined name + }; +// doc.2.end + +// A bit set for the flags. +using Flags = std::bitset<N_TYPES>; + +TEST_CASE("Lexicon Example", "[libts][Lexicon]") { + swoc::IPSpace<Flags> space; // Space in which to store the flags. + // Load the file contents + swoc::TextView text { R"( + 10.0.0.2-10.0.0.254,edge + 10.12.0.0/25,prod + 10.15.0.10-10.15.0.99,prod,secure + 172.16.0.0/22,external,secure + 192.168.17.0/23,external,prod + )" }; + // doc.load.begin + // Process all the lines in the file. + while (text) { + auto line = text.take_prefix_at('\n').trim_if(&isspace); + auto addr_token = line.take_prefix_at(','); // first token is the range. + swoc::IPRange r{addr_token}; + if (!r.empty()) { // empty means failed parse. + Flags flags; + while (line) { // parse out the rest of the comma separated elements + auto token = line.take_prefix_at(','); + auto idx = NetTypeNames[token]; + if (idx != NetType::INVALID) { // one of the valid strings + flags.set(idx); // set the bit + } + } + space.mark(r, flags); // store the flags in the spae. } - mark[value] = true; } - REQUIRE(mark.all()); - - ValueLexicon v2(std::move(vl)); - REQUIRE(vl.count() == 0); - - REQUIRE("LowValue" == v2[LowValue]); // Primary name - REQUIRE(NoValue == v2["NoValue"]); // Primary name - REQUIRE(NoValue == v2["garbage"]); // Secondary name + // doc.load.end - REQUIRE(HighValue == v2["highVALUE"]); - REQUIRE(HighValue == v2["HIGH_VALUE"]); - REQUIRE("HighValue" == v2[HighValue]); // Verify value -> primary name. + using AddrCase = std::tuple<swoc::IPAddr, Flags>; + std::array<AddrCase, 5> AddrList = { {"10.0.0.6", 0x8} , {"172.19.20.31", 0x5}, {"192.168.18.19", 3} + , {"10.15.0.57", 0x6}, {"10.12.0.126", 0x2}}; - // A few more checks on primary/secondary. - REQUIRE("Priceless" == v2[Priceless]); - REQUIRE(Priceless == v2["unique"]); - -}; - -TEST_CASE("Lexicon Constructor 2", "[libts][Lexicon]") -{ - // Check the various construction cases - // No defaults, value default, name default, both, both the other way. - const HexLexicon v1({{A, {"A", "ten"}}, {B, {"B", "eleven"}}}); - - const HexLexicon v2({{A, {"A", "ten"}}, {B, {"B", "eleven"}}}, INVALID); - - const HexLexicon v3({{A, {"A", "ten"}}, {B, {"B", "eleven"}}}, "Invalid"); - - const HexLexicon v4({{A, {"A", "ten"}}, {B, {"B", "eleven"}}}, "Invalid", INVALID); - - const HexLexicon v5{{{A, {"A", "ten"}}, {B, {"B", "eleven"}}}, INVALID, "Invalid"}; - - REQUIRE(v1["a"] == A); - REQUIRE(v2["q"] == INVALID); - REQUIRE(v3[C] == "Invalid"); - REQUIRE(v4["q"] == INVALID); - REQUIRE(v4[C] == "Invalid"); - REQUIRE(v5["q"] == INVALID); - REQUIRE(v5[C] == "Invalid"); + for ( auto const& [ addr, bits ] : AddrList ) { + // doc.lookup.begin + auto && [ range, flags ] = space.find(addr); + // doc.lookup.end + REQUIRE(flags == bits); + } + // doc.lookup.end }
