http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/pointer.zh-cn.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/pointer.zh-cn.md b/thirdparty/rapidjson-1.1.0/doc/pointer.zh-cn.md new file mode 100644 index 0000000..f58f55f --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/pointer.zh-cn.md @@ -0,0 +1,234 @@ +# Pointer + +ï¼æ¬åè½äº v1.1.0 åå¸ï¼ + +JSON Pointer æ¯ä¸ä¸ªæ ååï¼[RFC6901]ï¼çæ¹å¼å»éåä¸ä¸ª JSON Documentï¼DOMï¼ä¸çå¼ãè¿ç±»ä¼¼äº XML ç XPathãç¶èï¼JSON Pointer ç®åå¾å¤ï¼èä¸æ¯ä¸ª JSON Pointer ä» æåå个å¼ã + +ä½¿ç¨ RapidJSON ç JSON Pointer å®ç°è½ç®åä¸äº DOM çæä½ã + +[TOC] + +# JSON Pointer {#JsonPointer} + +ä¸ä¸ª JSON Pointer ç±ä¸ä¸²ï¼é¶è³å¤ä¸ªï¼token æç»æï¼æ¯ä¸ª token é½æ `/` åç¼ãæ¯ä¸ª token å¯ä»¥æ¯ä¸ä¸ªå符串ææ°åãä¾å¦ï¼ç»å®ä¸ä¸ª JSONï¼ +~~~javascript +{ + "foo" : ["bar", "baz"], + "pi" : 3.1416 +} +~~~ + +以ä¸ç JSON Pointer 解æä¸ºï¼ + +1. `"/foo"` â `[ "bar", "baz" ]` +2. `"/foo/0"` â `"bar"` +3. `"/foo/1"` â `"baz"` +4. `"/pi"` â `3.1416` + +è¦æ³¨æï¼ä¸ä¸ªç©º JSON Pointer `""` ï¼é¶ä¸ª tokenï¼è§£æ为æ´ä¸ª JSONã + +# åºæ¬ä½¿ç¨æ¹æ³ {#BasicUsage} + +以ä¸ç代ç èä¾ä¸è§£èªæã + +~~~cpp +#include "rapidjson/pointer.h" + +// ... +Document d; + +// ä½¿ç¨ Set() å建 DOM +Pointer("/project").Set(d, "RapidJSON"); +Pointer("/stars").Set(d, 10); + +// { "project" : "RapidJSON", "stars" : 10 } + +// ä½¿ç¨ Get() è®¿é® DOMãè¥è¯¥å¼ä¸åå¨åè¿å nullptrã +if (Value* stars = Pointer("/stars").Get(d)) + stars->SetInt(stars->GetInt() + 1); + +// { "project" : "RapidJSON", "stars" : 11 } + +// Set() å Create() èªå¨çæç¶å¼ï¼å¦æå®ä»¬ä¸åå¨ï¼ã +Pointer("/a/b/0").Create(d); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] } } + +// GetWithDefault() è¿åå¼ç¨ãè¥è¯¥å¼ä¸åå¨åä¼æ·±æ·è´ç¼ºçå¼ã +Value& hello = Pointer("/hello").GetWithDefault(d, "world"); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "world" } + +// Swap() å Set() ç¸ä¼¼ +Value x("C++"); +Pointer("/hello").Swap(d, x); + +// { "project" : "RapidJSON", "stars" : 11, "a" : { "b" : [ null ] }, "hello" : "C++" } +// x åæ "world" + +// å å»ä¸ä¸ªæåæå ç´ ï¼è¥å¼åå¨è¿å true +bool success = Pointer("/a").Erase(d); +assert(success); + +// { "project" : "RapidJSON", "stars" : 10 } +~~~ + +# è¾ å©å½æ° {#HelperFunctions} + +ç±äºé¢å对象çè°ç¨ä¹ æ¯å¯è½ä¸ç¬¦ç´è§ï¼RapidJSON ä¹æä¾äºä¸äºè¾ å©å½æ°ï¼å®ä»¬ææåå½æ°å è£ æèªç±å½æ°ã + +以ä¸çä¾åä¸ä¸é¢ä¾åæåçäºæ å®å ¨ç¸åã + +~~~cpp +Document d; + +SetValueByPointer(d, "/project", "RapidJSON"); +SetValueByPointer(d, "/stars", 10); + +if (Value* stars = GetValueByPointer(d, "/stars")) + stars->SetInt(stars->GetInt() + 1); + +CreateValueByPointer(d, "/a/b/0"); + +Value& hello = GetValueByPointerWithDefault(d, "/hello", "world"); + +Value x("C++"); +SwapValueByPointer(d, "/hello", x); + +bool success = EraseValueByPointer(d, "/a"); +assert(success); +~~~ + +以ä¸å¯¹æ¯ 3 ç§è°ç¨æ¹å¼ï¼ + +1. `Pointer(source).<Method>(root, ...)` +2. `<Method>ValueByPointer(root, Pointer(source), ...)` +3. `<Method>ValueByPointer(root, source, ...)` + +# 解æ Pointer {#ResolvingPointer} + +`Pointer::Get()` æ `GetValueByPointer()` å½æ°å¹¶ä¸ä¿®æ¹ DOMãè¥é£äº token ä¸è½å¹é DOM éçå¼ï¼è¿äºå½æ°ä¾¿è¿å `nullptr`ã使ç¨è å¯å©ç¨è¿ä¸ªæ¹æ³æ¥æ£æ¥ä¸ä¸ªå¼æ¯å¦åå¨ã + +注æï¼æ°å¼ token å¯è¡¨ç¤ºæ°ç»ç´¢å¼ææåååã解æè¿ç¨ä¸ä¼æå¼çç±»åæ¥å¹é ã + +~~~javascript +{ + "0" : 123, + "1" : [456] +} +~~~ + +1. `"/0"` â `123` +2. `"/1/0"` â `456` + +Token `"0"` å¨ç¬¬ä¸ä¸ª pointer ä¸è¢«å½ä½æåååãå®å¨ç¬¬äºä¸ª pointer ä¸è¢«å½ä½ææ°ç»ç´¢å¼ã + +å ¶ä»å½æ°ä¼æ¹å DOMï¼å æ¬ `Create()`ã`GetWithDefault()`ã`Set()`ã`Swap()`ãè¿äºå½æ°æ»æ¯æåçãè¥ä¸äºç¶å¼ä¸åå¨ï¼å°±ä¼å建å®ä»¬ãè¥ç¶å¼ç±»åä¸å¹é tokenï¼ä¹ä¼å¼ºè¡æ¹åå ¶ç±»åãæ¹åç±»åä¹æå³çå®å ¨ç§»é¤å ¶ DOM åæ çå 容ã + +ä¾å¦ï¼æä¸é¢ç JSON 解è¯è³ `d` ä¹åï¼ + +~~~cpp +SetValueByPointer(d, "1/a", 789); // { "0" : 123, "1" : { "a" : 789 } } +~~~ + +## 解æè´å· token + +å¦å¤ï¼[RFC6901] å®ä¹äºä¸ä¸ªç¹æ® token `-` ï¼å个è´å·ï¼ï¼ç¨äºè¡¨ç¤ºæ°ç»æåå ç´ çä¸ä¸ä¸ªå ç´ ã `Get()` åªä¼ææ¤ token å½ä½æååå '"-"'ãèå ¶ä»å½æ°åä¼ä»¥æ¤è§£ææ°ç»ï¼çåäºå¯¹æ°ç»è°ç¨ `Value::PushBack()` ã + +~~~cpp +Document d; +d.Parse("{\"foo\":[123]}"); +SetValueByPointer(d, "/foo/-", 456); // { "foo" : [123, 456] } +SetValueByPointer(d, "/-", 789); // { "foo" : [123, 456], "-" : 789 } +~~~ + +## 解æ Document å Value + +å½ä½¿ç¨ `p.Get(root)` æ `GetValueByPointer(root, p)`ï¼`root` æ¯ä¸ä¸ªï¼å¸¸æ°ï¼ `Value&`ãè¿æå³çï¼å®ä¹å¯ä»¥æ¯ DOM éçä¸ä¸ªåæ ã + +å ¶ä»å½æ°æ两ç»ç¾åãä¸ç»ä½¿ç¨ `Document& document` ä½ä¸ºåæ°ï¼å¦ä¸ç»ä½¿ç¨ `Value& root`ã第ä¸ç»ä½¿ç¨ `document.GetAllocator()` å»å建å¼ï¼è第äºç»åéè¦ä½¿ç¨è æä¾ä¸ä¸ª allocatorï¼å¦å DOM éçå½æ°ã + +以ä¸ä¾åé½ä¸éè¦ allocator åæ°ï¼å 为å®ç第ä¸ä¸ªåæ°æ¯ `Document&`ãä½å¦æä½ éè¦å¯¹ä¸ä¸ªåæ è¿è¡è§£æï¼å°±éè¦å¦ä¸é¢çä¾åè¬æä¾ allocatorï¼ + +~~~cpp +class Person { +public: + Person() { + document_ = new Document(); + // CreateValueByPointer() here no need allocator + SetLocation(CreateValueByPointer(*document_, "/residence"), ...); + SetLocation(CreateValueByPointer(*document_, "/office"), ...); + }; + +private: + void SetLocation(Value& location, const char* country, const char* addresses[2]) { + Value::Allocator& a = document_->GetAllocator(); + // SetValueByPointer() here need allocator + SetValueByPointer(location, "/country", country, a); + SetValueByPointer(location, "/address/0", address[0], a); + SetValueByPointer(location, "/address/1", address[1], a); + } + + // ... + + Document* document_; +}; +~~~ + +`Erase()` æ `EraseValueByPointer()` ä¸éè¦ allocatorãèä¸å®ä»¬æåå é¤å¼ä¹åä¼è¿å `true`ã + +# é误å¤ç {#ErrorHandling} + +`Pointer` å¨å ¶å»ºæå½æ°éä¼è§£è¯æºå符串ãè¥æ解æé误ï¼`Pointer::IsValid()` è¿å `false`ãä½ å¯ä½¿ç¨ `Pointer::GetParseErrorCode()` å `GetParseErrorOffset()` å»è·åéä¿¡æ¯ã + +è¦æ³¨æçæ¯ï¼ææ解æå½æ°é½å设 pointer æ¯åæ³çã对ä¸ä¸ªéæ³ pointer 解æä¼åææè¨å¤±è´¥ã + +# URI ç段表示æ¹å¼ {#URIFragment} + +é¤äºæ们ä¸ç´å¨ä½¿ç¨çå符串æ¹å¼è¡¨ç¤º JSON pointerï¼[RFC6901] ä¹å®ä¹äºä¸ä¸ª JSON Pointer ç URI ç段ï¼fragmentï¼è¡¨ç¤ºæ¹å¼ãURI ç段æ¯å®ä¹äº [RFC3986] "Uniform Resource Identifier (URI): Generic Syntax"ã + +URI ç段ç主è¦åå«æ¯å¿ ç¶ä»¥ `#` ï¼pound signï¼å¼å¤´ï¼èä¸äºå符ä¹ä¼ä»¥ç¾åæ¯ç¼ç æ UTF-8 åºåãä¾å¦ï¼ä»¥ä¸ç表å±ç¤ºäºä¸å表示æ³ä¸ç C/C++ å符串常æ°ã + +å符串表示æ¹å¼ | URI ç段表示æ¹å¼ | Pointer Tokens ï¼UTF-8ï¼ +----------------------|-----------------------------|------------------------ +`"/foo/0"` | `"#/foo/0"` | `{"foo", 0}` +`"/a~1b"` | `"#/a~1b"` | `{"a/b"}` +`"/m~0n"` | `"#/m~0n"` | `{"m~n"}` +`"/ "` | `"#/%20"` | `{" "}` +`"/\0"` | `"#/%00"` | `{"\0"}` +`"/â¬"` | `"#/%E2%82%AC"` | `{"â¬"}` + +RapidJSON å®å ¨æ¯æ URI ç段表示æ¹å¼ãå®å¨è§£è¯æ¶ä¼èªå¨æ£æµ `#` å·ã + +# å符串å + +ä½ ä¹å¯ä»¥æä¸ä¸ª `Pointer` å符串åï¼å¨åäºå符串æå ¶ä»è¾åºæµãä¾å¦ï¼ + +~~~ +Pointer p(...); +StringBuffer sb; +p.Stringify(sb); +std::cout << sb.GetString() << std::endl; +~~~ + +ä½¿ç¨ `StringifyUriFragment()` å¯ä»¥æ pointer å符串å为 URI ç段表示æ³ã + +# 使ç¨è æä¾ç tokens {#UserSuppliedTokens} + +è¥ä¸ä¸ª pointer ä¼ç¨äºå¤æ¬¡è§£æï¼å®åºè¯¥åªè¢«å建ä¸æ¬¡ï¼ç¶ååæ½äºä¸åç DOM ï¼æå¨ä¸åæ¶é´å解æãè¿æ ·å¯ä»¥é¿å å¤æ¬¡åé® `Pointer`ï¼èçæ¶é´åå ååé ã + +æ们çè³å¯ä»¥åæ´è¿ä¸æ¥ï¼å®å ¨æ¶å»è§£æè¿ç¨åå¨æå ååé ãæ们å¯ä»¥ç´æ¥çæ token æ°ç»ï¼ + +~~~cpp +#define NAME(s) { s, sizeof(s) / sizeof(s[0]) - 1, kPointerInvalidIndex } +#define INDEX(i) { #i, sizeof(#i) - 1, i } + +static const Pointer::Token kTokens[] = { NAME("foo"), INDEX(123) }; +static const Pointer p(kTokens, sizeof(kTokens) / sizeof(kTokens[0])); +// Equivalent to static const Pointer p("/foo/123"); +~~~ + +è¿ç§åæ³å¯è½éåå ååéçç³»ç»ã + +[RFC3986]: https://tools.ietf.org/html/rfc3986 +[RFC6901]: https://tools.ietf.org/html/rfc6901
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/sax.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/sax.md b/thirdparty/rapidjson-1.1.0/doc/sax.md new file mode 100644 index 0000000..1d4fc2a --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/sax.md @@ -0,0 +1,486 @@ +# SAX + +The term "SAX" originated from [Simple API for XML](http://en.wikipedia.org/wiki/Simple_API_for_XML). We borrowed this term for JSON parsing and generation. + +In RapidJSON, `Reader` (typedef of `GenericReader<...>`) is the SAX-style parser for JSON, and `Writer` (typedef of `GenericWriter<...>`) is the SAX-style generator for JSON. + +[TOC] + +# Reader {#Reader} + +`Reader` parses a JSON from a stream. While it reads characters from the stream, it analyze the characters according to the syntax of JSON, and publish events to a handler. + +For example, here is a JSON. + +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +While a `Reader` parses this JSON, it publishes the following events to the handler sequentially: + +~~~~~~~~~~ +StartObject() +Key("hello", 5, true) +String("world", 5, true) +Key("t", 1, true) +Bool(true) +Key("f", 1, true) +Bool(false) +Key("n", 1, true) +Null() +Key("i") +UInt(123) +Key("pi") +Double(3.1416) +Key("a") +StartArray() +Uint(1) +Uint(2) +Uint(3) +Uint(4) +EndArray(4) +EndObject(7) +~~~~~~~~~~ + +These events can be easily matched with the JSON, except some event parameters need further explanation. Let's see the `simplereader` example which produces exactly the same output as above: + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +struct MyHandler : public BaseReaderHandler<UTF8<>, MyHandler> { + bool Null() { cout << "Null()" << endl; return true; } + bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; } + bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; } + bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; } + bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; } + bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; } + bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; } + bool String(const char* str, SizeType length, bool copy) { + cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool StartObject() { cout << "StartObject()" << endl; return true; } + bool Key(const char* str, SizeType length, bool copy) { + cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; } + bool StartArray() { cout << "StartArray()" << endl; return true; } + bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; } +}; + +void main() { + const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } "; + + MyHandler handler; + Reader reader; + StringStream ss(json); + reader.Parse(ss, handler); +} +~~~~~~~~~~ + +Note that, RapidJSON uses template to statically bind the `Reader` type and the handler type, instead of using class with virtual functions. This paradigm can improve the performance by inlining functions. + +## Handler {#Handler} + +As the previous example showed, user needs to implement a handler, which consumes the events (function calls) from `Reader`. The handler must contain the following member functions. + +~~~~~~~~~~cpp +class Handler { + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +~~~~~~~~~~ + +`Null()` is called when the `Reader` encounters a JSON null value. + +`Bool(bool)` is called when the `Reader` encounters a JSON true or false value. + +When the `Reader` encounters a JSON number, it chooses a suitable C++ type mapping. And then it calls *one* function out of `Int(int)`, `Uint(unsigned)`, `Int64(int64_t)`, `Uint64(uint64_t)` and `Double(double)`. If `kParseNumbersAsStrings` is enabled, `Reader` will always calls `RawNumber()` instead. + +`String(const char* str, SizeType length, bool copy)` is called when the `Reader` encounters a string. The first parameter is pointer to the string. The second parameter is the length of the string (excluding the null terminator). Note that RapidJSON supports null character `'\0'` inside a string. If such situation happens, `strlen(str) < length`. The last `copy` indicates whether the handler needs to make a copy of the string. For normal parsing, `copy = true`. Only when *insitu* parsing is used, `copy = false`. And beware that, the character type depends on the target encoding, which will be explained later. + +When the `Reader` encounters the beginning of an object, it calls `StartObject()`. An object in JSON is a set of name-value pairs. If the object contains members it first calls `Key()` for the name of member, and then calls functions depending on the type of the value. These calls of name-value pairs repeats until calling `EndObject(SizeType memberCount)`. Note that the `memberCount` parameter is just an aid for the handler, user may not need this parameter. + +Array is similar to object but simpler. At the beginning of an array, the `Reader` calls `BeginArary()`. If there is elements, it calls functions according to the types of element. Similarly, in the last call `EndArray(SizeType elementCount)`, the parameter `elementCount` is just an aid for the handler. + +Every handler functions returns a `bool`. Normally it should returns `true`. If the handler encounters an error, it can return `false` to notify event publisher to stop further processing. + +For example, when we parse a JSON with `Reader` and the handler detected that the JSON does not conform to the required schema, then the handler can return `false` and let the `Reader` stop further parsing. And the `Reader` will be in error state with error code `kParseErrorTermination`. + +## GenericReader {#GenericReader} + +As mentioned before, `Reader` is a typedef of a template class `GenericReader`: + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> > +class GenericReader { + // ... +}; + +typedef GenericReader<UTF8<>, UTF8<> > Reader; + +} // namespace rapidjson +~~~~~~~~~~ + +The `Reader` uses UTF-8 as both source and target encoding. The source encoding means the encoding in the JSON stream. The target encoding means the encoding of the `str` parameter in `String()` calls. For example, to parse a UTF-8 stream and outputs UTF-16 string events, you can define a reader by: + +~~~~~~~~~~cpp +GenericReader<UTF8<>, UTF16<> > reader; +~~~~~~~~~~ + +Note that, the default character type of `UTF16` is `wchar_t`. So this `reader`needs to call `String(const wchar_t*, SizeType, bool)` of the handler. + +The third template parameter `Allocator` is the allocator type for internal data structure (actually a stack). + +## Parsing {#SaxParsing} + +The one and only one function of `Reader` is to parse JSON. + +~~~~~~~~~~cpp +template <unsigned parseFlags, typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); + +// with parseFlags = kDefaultParseFlags +template <typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); +~~~~~~~~~~ + +If an error occurs during parsing, it will return `false`. User can also calls `bool HasParseEror()`, `ParseErrorCode GetParseErrorCode()` and `size_t GetErrorOffset()` to obtain the error states. Actually `Document` uses these `Reader` functions to obtain parse errors. Please refer to [DOM](doc/dom.md) for details about parse error. + +# Writer {#Writer} + +`Reader` converts (parses) JSON into events. `Writer` does exactly the opposite. It converts events into JSON. + +`Writer` is very easy to use. If your application only need to converts some data into JSON, it may be a good choice to use `Writer` directly, instead of building a `Document` and then stringifying it with a `Writer`. + +In `simplewriter` example, we do exactly the reverse of `simplereader`. + +~~~~~~~~~~cpp +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +void main() { + StringBuffer s; + Writer<StringBuffer> writer(s); + + writer.StartObject(); + writer.Key("hello"); + writer.String("world"); + writer.Key("t"); + writer.Bool(true); + writer.Key("f"); + writer.Bool(false); + writer.Key("n"); + writer.Null(); + writer.Key("i"); + writer.Uint(123); + writer.Key("pi"); + writer.Double(3.1416); + writer.Key("a"); + writer.StartArray(); + for (unsigned i = 0; i < 4; i++) + writer.Uint(i); + writer.EndArray(); + writer.EndObject(); + + cout << s.GetString() << endl; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{"hello":"world","t":true,"f":false,"n":null,"i":123,"pi":3.1416,"a":[0,1,2,3]} +~~~~~~~~~~ + +There are two `String()` and `Key()` overloads. One is the same as defined in handler concept with 3 parameters. It can handle string with null characters. Another one is the simpler version used in the above example. + +Note that, the example code does not pass any parameters in `EndArray()` and `EndObject()`. An `SizeType` can be passed but it will be simply ignored by `Writer`. + +You may doubt that, why not just using `sprintf()` or `std::stringstream` to build a JSON? + +There are various reasons: +1. `Writer` must output a well-formed JSON. If there is incorrect event sequence (e.g. `Int()` just after `StartObject()`), it generates assertion fail in debug mode. +2. `Writer::String()` can handle string escaping (e.g. converting code point `U+000A` to `\n`) and Unicode transcoding. +3. `Writer` handles number output consistently. +4. `Writer` implements the event handler concept. It can be used to handle events from `Reader`, `Document` or other event publisher. +5. `Writer` can be optimized for different platforms. + +Anyway, using `Writer` API is even simpler than generating a JSON by ad hoc methods. + +## Template {#WriterTemplate} + +`Writer` has a minor design difference to `Reader`. `Writer` is a template class, not a typedef. There is no `GenericWriter`. The following is the declaration. + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename Allocator = CrtAllocator<>, unsigned writeFlags = kWriteDefaultFlags> +class Writer { +public: + Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) +// ... +}; + +} // namespace rapidjson +~~~~~~~~~~ + +The `OutputStream` template parameter is the type of output stream. It cannot be deduced and must be specified by user. + +The `SourceEncoding` template parameter specifies the encoding to be used in `String(const Ch*, ...)`. + +The `TargetEncoding` template parameter specifies the encoding in the output stream. + +The `Allocator` is the type of allocator, which is used for allocating internal data structure (a stack). + +The `writeFlags` are combination of the following bit-flags: + +Parse flags | Meaning +------------------------------|----------------------------------- +`kWriteNoFlags` | No flag is set. +`kWriteDefaultFlags` | Default write flags. It is equal to macro `RAPIDJSON_WRITE_DEFAULT_FLAGS`, which is defined as `kWriteNoFlags`. +`kWriteValidateEncodingFlag` | Validate encoding of JSON strings. +`kWriteNanAndInfFlag` | Allow writing of `Infinity`, `-Infinity` and `NaN`. + +Besides, the constructor of `Writer` has a `levelDepth` parameter. This parameter affects the initial memory allocated for storing information per hierarchy level. + +## PrettyWriter {#PrettyWriter} + +While the output of `Writer` is the most condensed JSON without white-spaces, suitable for network transfer or storage, it is not easily readable by human. + +Therefore, RapidJSON provides a `PrettyWriter`, which adds indentation and line feeds in the output. + +The usage of `PrettyWriter` is exactly the same as `Writer`, expect that `PrettyWriter` provides a `SetIndent(Ch indentChar, unsigned indentCharCount)` function. The default is 4 spaces. + +## Completeness and Reset {#CompletenessReset} + +A `Writer` can only output a single JSON, which can be any JSON type at the root. Once the singular event for root (e.g. `String()`), or the last matching `EndObject()` or `EndArray()` event, is handled, the output JSON is well-formed and complete. User can detect this state by calling `Writer::IsComplete()`. + +When a JSON is complete, the `Writer` cannot accept any new events. Otherwise the output will be invalid (i.e. having more than one root). To reuse the `Writer` object, user can call `Writer::Reset(OutputStream& os)` to reset all internal states of the `Writer` with a new output stream. + +# Techniques {#SaxTechniques} + +## Parsing JSON to Custom Data Structure {#CustomDataStructure} + +`Document`'s parsing capability is completely based on `Reader`. Actually `Document` is a handler which receives events from a reader to build a DOM during parsing. + +User may uses `Reader` to build other data structures directly. This eliminates building of DOM, thus reducing memory and improving performance. + +In the following `messagereader` example, `ParseMessages()` parses a JSON which should be an object with key-string pairs. + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/error/en.h" +#include <iostream> +#include <string> +#include <map> + +using namespace std; +using namespace rapidjson; + +typedef map<string, string> MessageMap; + +struct MessageHandler + : public BaseReaderHandler<UTF8<>, MessageHandler> { + MessageHandler() : state_(kExpectObjectStart) { + } + + bool StartObject() { + switch (state_) { + case kExpectObjectStart: + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool String(const char* str, SizeType length, bool) { + switch (state_) { + case kExpectNameOrObjectEnd: + name_ = string(str, length); + state_ = kExpectValue; + return true; + case kExpectValue: + messages_.insert(MessageMap::value_type(name_, string(str, length))); + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; } + + bool Default() { return false; } // All other events are invalid. + + MessageMap messages_; + enum State { + kExpectObjectStart, + kExpectNameOrObjectEnd, + kExpectValue, + }state_; + std::string name_; +}; + +void ParseMessages(const char* json, MessageMap& messages) { + Reader reader; + MessageHandler handler; + StringStream ss(json); + if (reader.Parse(ss, handler)) + messages.swap(handler.messages_); // Only change it if success. + else { + ParseErrorCode e = reader.GetParseErrorCode(); + size_t o = reader.GetErrorOffset(); + cout << "Error: " << GetParseError_En(e) << endl;; + cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl; + } +} + +int main() { + MessageMap messages; + + const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }"; + cout << json1 << endl; + ParseMessages(json1, messages); + + for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr) + cout << itr->first << ": " << itr->second << endl; + + cout << endl << "Parse a JSON with invalid schema." << endl; + const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }"; + cout << json2 << endl; + ParseMessages(json2, messages); + + return 0; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{ "greeting" : "Hello!", "farewell" : "bye-bye!" } +farewell: bye-bye! +greeting: Hello! + +Parse a JSON with invalid schema. +{ "greeting" : "Hello!", "farewell" : "bye-bye!", "foo" : {} } +Error: Terminate parsing due to Handler error. + at offset 59 near '} }...' +~~~~~~~~~~ + +The first JSON (`json1`) was successfully parsed into `MessageMap`. Since `MessageMap` is a `std::map`, the printing order are sorted by the key. This order is different from the JSON's order. + +In the second JSON (`json2`), `foo`'s value is an empty object. As it is an object, `MessageHandler::StartObject()` will be called. However, at that moment `state_ = kExpectValue`, so that function returns `false` and cause the parsing process be terminated. The error code is `kParseErrorTermination`. + +## Filtering of JSON {#Filtering} + +As mentioned earlier, `Writer` can handle the events published by `Reader`. `condense` example simply set a `Writer` as handler of a `Reader`, so it can remove all white-spaces in JSON. `pretty` example uses the same relationship, but replacing `Writer` by `PrettyWriter`. So `pretty` can be used to reformat a JSON with indentation and line feed. + +Actually, we can add intermediate layer(s) to filter the contents of JSON via these SAX-style API. For example, `capitalize` example capitalize all strings in a JSON. + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" +#include <vector> +#include <cctype> + +using namespace rapidjson; + +template<typename OutputHandler> +struct CapitalizeFilter { + CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() { + } + + bool Null() { return out_.Null(); } + bool Bool(bool b) { return out_.Bool(b); } + bool Int(int i) { return out_.Int(i); } + bool Uint(unsigned u) { return out_.Uint(u); } + bool Int64(int64_t i) { return out_.Int64(i); } + bool Uint64(uint64_t u) { return out_.Uint64(u); } + bool Double(double d) { return out_.Double(d); } + bool RawNumber(const char* str, SizeType length, bool copy) { return out_.RawNumber(str, length, copy); } + bool String(const char* str, SizeType length, bool) { + buffer_.clear(); + for (SizeType i = 0; i < length; i++) + buffer_.push_back(std::toupper(str[i])); + return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string + } + bool StartObject() { return out_.StartObject(); } + bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); } + bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); } + bool StartArray() { return out_.StartArray(); } + bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); } + + OutputHandler& out_; + std::vector<char> buffer_; +}; + +int main(int, char*[]) { + // Prepare JSON reader and input stream. + Reader reader; + char readBuffer[65536]; + FileReadStream is(stdin, readBuffer, sizeof(readBuffer)); + + // Prepare JSON writer and output stream. + char writeBuffer[65536]; + FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer)); + Writer<FileWriteStream> writer(os); + + // JSON reader parse from the input stream and let writer generate the output. + CapitalizeFilter<Writer<FileWriteStream> > filter(writer); + if (!reader.Parse(is, filter)) { + fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode())); + return 1; + } + + return 0; +} +~~~~~~~~~~ + +Note that, it is incorrect to simply capitalize the JSON as a string. For example: +~~~~~~~~~~ +["Hello\nWorld"] +~~~~~~~~~~ + +Simply capitalizing the whole JSON would contain incorrect escape character: +~~~~~~~~~~ +["HELLO\NWORLD"] +~~~~~~~~~~ + +The correct result by `capitalize`: +~~~~~~~~~~ +["HELLO\nWORLD"] +~~~~~~~~~~ + +More complicated filters can be developed. However, since SAX-style API can only provide information about a single event at a time, user may need to book-keeping the contextual information (e.g. the path from root value, storage of other related values). Some processing may be easier to be implemented in DOM than SAX. http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/sax.zh-cn.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/sax.zh-cn.md b/thirdparty/rapidjson-1.1.0/doc/sax.zh-cn.md new file mode 100644 index 0000000..b20286d --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/sax.zh-cn.md @@ -0,0 +1,487 @@ +# SAX + +"SAX" æ¤æ¯è¯æºäº [Simple API for XML](http://en.wikipedia.org/wiki/Simple_API_for_XML)ãæ们åäºæ¤æ¯è¯å»å¥ç¨å¨ JSON ç解æåçæã + +å¨ RapidJSON ä¸ï¼`Reader`ï¼`GenericReader<...>` ç typedefï¼æ¯ JSON ç SAX é£æ ¼è§£æå¨ï¼è `Writer`ï¼`GenericWriter<...>` ç typedefï¼åæ¯ JSON ç SAX é£æ ¼çæå¨ã + +[TOC] + +# Reader {#Reader} + +`Reader` ä»è¾å ¥æµè§£æä¸ä¸ª JSONãå½å®ä»æµä¸è¯»åå符æ¶ï¼å®ä¼åºäº JSON çè¯æ³å»åæå符ï¼å¹¶åå¤çå¨åéäºä»¶ã + +ä¾å¦ï¼ä»¥ä¸æ¯ä¸ä¸ª JSONã + +~~~~~~~~~~js +{ + "hello": "world", + "t": true , + "f": false, + "n": null, + "i": 123, + "pi": 3.1416, + "a": [1, 2, 3, 4] +} +~~~~~~~~~~ + +å½ä¸ä¸ª `Reader` 解ææ¤ JSON æ¶ï¼å®ä¼é¡ºåºå°åå¤çå¨åé以ä¸çäºä»¶ï¼ + +~~~~~~~~~~ +StartObject() +Key("hello", 5, true) +String("world", 5, true) +Key("t", 1, true) +Bool(true) +Key("f", 1, true) +Bool(false) +Key("n", 1, true) +Null() +Key("i") +UInt(123) +Key("pi") +Double(3.1416) +Key("a") +StartArray() +Uint(1) +Uint(2) +Uint(3) +Uint(4) +EndArray(4) +EndObject(7) +~~~~~~~~~~ + +é¤äºä¸äºäºä»¶åæ°éè¦åä½è§£éï¼è¿äºäºä»¶å¯ä»¥è½»æ¾å°ä¸ JSON 对ä¸ãæ们å¯ä»¥çç `simplereader` ä¾åææ ·äº§çå以ä¸å®å ¨ç¸åçç»æï¼ + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +struct MyHandler : public BaseReaderHandler<UTF8<>, MyHandler> { + bool Null() { cout << "Null()" << endl; return true; } + bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; } + bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; } + bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; } + bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; } + bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; } + bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; } + bool String(const char* str, SizeType length, bool copy) { + cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool StartObject() { cout << "StartObject()" << endl; return true; } + bool Key(const char* str, SizeType length, bool copy) { + cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl; + return true; + } + bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; } + bool StartArray() { cout << "StartArray()" << endl; return true; } + bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; } +}; + +void main() { + const char json[] = " { \"hello\" : \"world\", \"t\" : true , \"f\" : false, \"n\": null, \"i\":123, \"pi\": 3.1416, \"a\":[1, 2, 3, 4] } "; + + MyHandler handler; + Reader reader; + StringStream ss(json); + reader.Parse(ss, handler); +} +~~~~~~~~~~ + +注æ RapidJSON 使ç¨æ¨¡æ¿å»éææ·å® `Reader` ç±»ååå¤çå¨ç类形ï¼èä¸æ¯ä½¿ç¨å«èå½æ°çç±»ãè¿ä¸ªèå¼å¯ä»¥éè¿æå½æ°å èèæ¹åæ§è½ã + +## å¤çå¨ {#Handler} + +å¦åä¾æ示ï¼ä½¿ç¨è éè¦å®ç°ä¸ä¸ªå¤çå¨ï¼handlerï¼ï¼ç¨äºå¤çæ¥èª `Reader` çäºä»¶ï¼å½æ°è°ç¨ï¼ãå¤çå¨å¿ é¡»å å«ä»¥ä¸çæåå½æ°ã + +~~~~~~~~~~cpp +class Handler { + bool Null(); + bool Bool(bool b); + bool Int(int i); + bool Uint(unsigned i); + bool Int64(int64_t i); + bool Uint64(uint64_t i); + bool Double(double d); + bool RawNumber(const Ch* str, SizeType length, bool copy); + bool String(const Ch* str, SizeType length, bool copy); + bool StartObject(); + bool Key(const Ch* str, SizeType length, bool copy); + bool EndObject(SizeType memberCount); + bool StartArray(); + bool EndArray(SizeType elementCount); +}; +~~~~~~~~~~ + +å½ `Reader` éå° JSON null å¼æ¶ä¼è°ç¨ `Null()`ã + +å½ `Reader` éå° JSON true æ false å¼æ¶ä¼è°ç¨ `Bool(bool)`ã + +å½ `Reader` éå° JSON numberï¼å®ä¼éæ©ä¸ä¸ªåéç C++ ç±»åæ å°ï¼ç¶åè°ç¨ `Int(int)`ã`Uint(unsigned)`ã`Int64(int64_t)`ã`Uint64(uint64_t)` å `Double(double)` ç * å ¶ä¸ä¹ä¸ä¸ª *ã è¥å¼å¯äº `kParseNumbersAsStrings` é项ï¼`Reader` 便ä¼æ¹ä¸ºè°ç¨ `RawNumber()`ã + +å½ `Reader` éå° JSON stringï¼å®ä¼è°ç¨ `String(const char* str, SizeType length, bool copy)`ã第ä¸ä¸ªåæ°æ¯å符串çæéã第äºä¸ªåæ°æ¯å符串çé¿åº¦ï¼ä¸å å«ç©ºç»æ¢ç¬¦å·ï¼ã注æ RapidJSON æ¯æå串ä¸å«æ空å符 `'\0'`ãè¥åºç°è¿ç§æ åµï¼ä¾¿ä¼æ `strlen(str) < length`ãæåç `copy` åæ°è¡¨ç¤ºå¤çå¨æ¯å¦éè¦å¤å¶è¯¥å符串ãå¨æ£å¸¸è§£ææ¶ï¼`copy = true`ãä» å½ä½¿ç¨åä½è§£ææ¶ï¼`copy = false`ãæ¤å¤ï¼è¿è¦æ³¨æå符çç±»åä¸ç®æ ç¼ç ç¸å ³ï¼æ们ç¨åä¼åè°è¿ä¸ç¹ã + +å½ `Reader` éå° JSON object çå¼å§ä¹æ¶ï¼å®ä¼è°ç¨ `StartObject()`ãJSON ç object æ¯ä¸ä¸ªé®å¼å¯¹ï¼æåï¼çéåãè¥ object å å«æåï¼å®ä¼å 为æåçååè°ç¨ `Key()`ï¼ç¶ååæå¼çç±»åè°ç¨å½æ°ãå®ä¸æè°ç¨è¿äºé®å¼å¯¹ï¼ç´è³æç»è°ç¨ `EndObject(SizeType memberCount)`ã注æ `memberCount` åæ°å¯¹å¤çå¨æ¥è¯´åªæ¯åå©æ§è´¨ï¼ä½¿ç¨è å¯è½ä¸éè¦æ¤åæ°ã + +JSON array ä¸ object ç¸ä¼¼ï¼ä½æ´ç®åãå¨ array å¼å§æ¶ï¼`Reader` ä¼è°ç¨ `BeginArary()`ãè¥ array å«æå ç´ ï¼å®ä¼æå ç´ çç±»åæ¥è¯»ç¨å½æ°ãç¸ä¼¼å°ï¼æåå®ä¼è°ç¨ `EndArray(SizeType elementCount)`ï¼å ¶ä¸ `elementCount` åæ°å¯¹å¤çå¨æ¥è¯´åªæ¯åå©æ§è´¨ã + +æ¯ä¸ªå¤çå¨å½æ°é½è¿åä¸ä¸ª `bool`ãæ£å¸¸å®ä»¬åºè¿å `true`ãè¥å¤çå¨éå°é误ï¼å®å¯ä»¥è¿å `false` å»éç¥äºä»¶åéæ¹åæ¢ç»§ç»å¤çã + +ä¾å¦ï¼å½æä»¬ç¨ `Reader` 解æä¸ä¸ª JSON æ¶ï¼å¤çå¨æ£æµå°è¯¥ JSON 并ä¸ç¬¦åæéç schemaï¼é£ä¹å¤çå¨å¯ä»¥è¿å `false`ï¼ä»¤ `Reader` åæ¢ä¹åç解æå·¥ä½ãè `Reader` ä¼è¿å ¥ä¸ä¸ªé误ç¶æï¼å¹¶ä»¥ `kParseErrorTermination` é误ç æ è¯ã + +## GenericReader {#GenericReader} + +åé¢æåï¼`Reader` æ¯ `GenericReader` 模æ¿ç±»ç typedefï¼ + +~~~~~~~~~~cpp +namespace rapidjson { + +template <typename SourceEncoding, typename TargetEncoding, typename Allocator = MemoryPoolAllocator<> > +class GenericReader { + // ... +}; + +typedef GenericReader<UTF8<>, UTF8<> > Reader; + +} // namespace rapidjson +~~~~~~~~~~ + +`Reader` ä½¿ç¨ UTF-8 ä½ä¸ºæ¥æºåç®æ ç¼ç ãæ¥æºç¼ç æ¯æ JSON æµçç¼ç ãç®æ ç¼ç æ¯æ `String()` ç `str` åæ°æç¨çç¼ç ãä¾å¦ï¼è¦è§£æä¸ä¸ª UTF-8 æµå¹¶è¾åºè³ UTF-16 string äºä»¶ï¼ä½ éè¦è¿ä¹å®ä¹ä¸ä¸ª readerï¼ + +~~~~~~~~~~cpp +GenericReader<UTF8<>, UTF16<> > reader; +~~~~~~~~~~ + +注æå° `UTF16` ç缺çç±»åæ¯ `wchar_t`ãå æ¤è¿ä¸ª `reader` éè¦è°ç¨å¤çå¨ç `String(const wchar_t*, SizeType, bool)`ã + +第ä¸ä¸ªæ¨¡æ¿åæ° `Allocator` æ¯å é¨æ°æ®ç»æï¼å®é ä¸æ¯ä¸ä¸ªå æ ï¼çåé å¨ç±»åã + +## 解æ {#SaxParsing} + +`Reader` çå¯ä¸åè½å°±æ¯è§£æ JSONã + +~~~~~~~~~~cpp +template <unsigned parseFlags, typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); + +// ä½¿ç¨ parseFlags = kDefaultParseFlags +template <typename InputStream, typename Handler> +bool Parse(InputStream& is, Handler& handler); +~~~~~~~~~~ + +è¥å¨è§£æä¸åºç°é误ï¼å®ä¼è¿å `false`ã使ç¨è å¯è°ç¨ `bool HasParseEror()`, `ParseErrorCode GetParseErrorCode()` å `size_t GetErrorOffset()` è·åé误ç¶æãå®é ä¸ `Document` 使ç¨è¿äº `Reader` å½æ°å»è·å解æé误ã请åè [DOM](doc/dom.zh-cn.md) å»äºè§£æå ³è§£æé误çç»èã + +# Writer {#Writer} + +`Reader` æ JSON 转æ¢ï¼è§£æï¼æ为äºä»¶ã`Writer` åå®å ¨ç¸åçäºæ ãå®æäºä»¶è½¬æ¢æ JSONã + +`Writer` æ¯é常容æ使ç¨çãè¥ä½ çåºç¨ç¨åºåªéæä¸äºæ°æ®è½¬æ¢æ JSONï¼å¯è½ç´æ¥ä½¿ç¨ `Writer`ï¼ä¼æ¯å»ºç«ä¸ä¸ª `Document` ç¶åç¨ `Writer` æå®è½¬æ¢æ JSON æ´å æ¹ä¾¿ã + +å¨ `simplewriter` ä¾åéï¼æ们å `simplereader` å®å ¨ç¸åçäºæ ã + +~~~~~~~~~~cpp +#include "rapidjson/writer.h" +#include "rapidjson/stringbuffer.h" +#include <iostream> + +using namespace rapidjson; +using namespace std; + +void main() { + StringBuffer s; + Writer<StringBuffer> writer(s); + + writer.StartObject(); + writer.Key("hello"); + writer.String("world"); + writer.Key("t"); + writer.Bool(true); + writer.Key("f"); + writer.Bool(false); + writer.Key("n"); + writer.Null(); + writer.Key("i"); + writer.Uint(123); + writer.Key("pi"); + writer.Double(3.1416); + writer.Key("a"); + writer.StartArray(); + for (unsigned i = 0; i < 4; i++) + writer.Uint(i); + writer.EndArray(); + writer.EndObject(); + + cout << s.GetString() << endl; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{"hello":"world","t":true,"f":false,"n":null,"i":123,"pi":3.1416,"a":[0,1,2,3]} +~~~~~~~~~~ + +`String()` å `Key()` åæ两个éè½½ãä¸ä¸ªæ¯å¦å¤çå¨ concept è¬ï¼æ 3 个åæ°ãå®è½å¤çå«ç©ºå符çå符串ãå¦ä¸ä¸ªæ¯å¦ä¸ä¸ä½¿ç¨çè¾ç®åçæ¬ã + +注æå°ï¼ä¾å代ç ä¸ç `EndArray()` å `EndObject()` 并没æåæ°ãå¯ä»¥ä¼ éä¸ä¸ª `SizeType` çåæ°ï¼ä½å®ä¼è¢« `Writer` 忽ç¥ã + +ä½ å¯è½ä¼æçï¼ä¸ºä»ä¹ä¸ä½¿ç¨ `sprintf()` æ `std::stringstream` å»å»ºç«ä¸ä¸ª JSONï¼ + +è¿æå 个åå ï¼ +1. `Writer` å¿ ç¶ä¼è¾åºä¸ä¸ªç»æè¯å¥½ï¼well-formedï¼ç JSONãè¥ç¶æé误çäºä»¶æ¬¡åºï¼å¦ `Int()` ç´§é `StartObject()` åºç°ï¼ï¼å®ä¼å¨è°è¯æ¨¡å¼ä¸äº§çæè¨å¤±è´¥ã +2. `Writer::String()` å¯å¤çå符串转ä¹ï¼å¦æç ç¹ `U+000A` 转æ¢æ `\n`ï¼åè¿è¡ Unicode 转ç ã +3. `Writer` ä¸è´å°å¤ç number çè¾åºã +4. `Writer` å®ç°äºäºä»¶å¤çå¨ conceptãå¯ç¨äºå¤çæ¥èª `Reader`ã`Document` æå ¶ä»äºä»¶åçå¨ã +5. `Writer` å¯å¯¹ä¸åå¹³å°è¿è¡ä¼åã + +æ 论å¦ä½ï¼ä½¿ç¨ `Writer` API å»çæ JSON çè³ä¹æ¯è¿äºä¸´æ¶æ¹æ³æ´ç®åã + +## æ¨¡æ¿ {#WriterTemplate} + +`Writer` ä¸ `Reader` æå°è®¸è®¾è®¡åºå«ã`Writer` æ¯ä¸ä¸ªæ¨¡æ¿ç±»ï¼èä¸æ¯ä¸ä¸ª typedefã 并没æ `GenericWriter`ã以ä¸æ¯ `Writer` ç声æã + +~~~~~~~~~~cpp +namespace rapidjson { + +template<typename OutputStream, typename SourceEncoding = UTF8<>, typename TargetEncoding = UTF8<>, typename Allocator = CrtAllocator<> > +class Writer { +public: + Writer(OutputStream& os, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) +// ... +}; + +} // namespace rapidjson +~~~~~~~~~~ + +`OutputStream` 模æ¿åæ°æ¯è¾åºæµçç±»åãå®çç±»åä¸å¯ä»¥è¢«èªå¨æ¨æï¼å¿ é¡»ç±ä½¿ç¨è æä¾ã + +`SourceEncoding` 模æ¿åæ°æå®äº `String(const Ch*, ...)` çç¼ç ã + +`TargetEncoding` 模æ¿åæ°æå®è¾åºæµçç¼ç ã + +`Allocator` æ¯åé å¨çç±»åï¼ç¨äºåé å é¨æ°æ®ç»æï¼ä¸ä¸ªå æ ï¼ã + +`writeFlags` æ¯ä»¥ä¸ä½æ å¿çç»åï¼ + +åå ¥ä½æ å¿ | æä¹ +------------------------------|----------------------------------- +`kWriteNoFlags` | 没æä»»ä½æ å¿ã +`kWriteDefaultFlags` | 缺çç解æé项ãå®çäº `RAPIDJSON_WRITE_DEFAULT_FLAGS` å®ï¼æ¤å®å®ä¹ä¸º `kWriteNoFlags`ã +`kWriteValidateEncodingFlag` | æ ¡éª JSON å符串çç¼ç ã +`kWriteNanAndInfFlag` | 容许åå ¥ `Infinity`, `-Infinity` å `NaN`ã + +æ¤å¤ï¼`Writer` çæé å½æ°æä¸ `levelDepth` åæ°ãåå¨æ¯å±é¶ä¿¡æ¯çåå§å ååé éåæ¤åæ°å½±åã + +## PrettyWriter {#PrettyWriter} + +`Writer` æè¾åºçæ¯æ²¡æç©ºæ ¼å符çæç´§å JSONï¼éåç½ç»ä¼ è¾æå¨åï¼ä½ä¸éå人类é 读ã + +å æ¤ï¼RapidJSON æä¾äºä¸ä¸ª `PrettyWriter`ï¼å®å¨è¾åºä¸å å ¥ç¼©è¿åæ¢è¡ã + +`PrettyWriter` çç¨æ³ä¸ `Writer` å ä¹ä¸æ ·ï¼ä¸åä¹å¤æ¯ `PrettyWriter` æä¾äºä¸ä¸ª `SetIndent(Ch indentChar, unsigned indentCharCount)` å½æ°ã缺çç缩è¿æ¯ 4 ä¸ªç©ºæ ¼ã + +## å®æ´æ§åéç½® {#CompletenessReset} + +ä¸ä¸ª `Writer` åªå¯è¾åºå个 JSONï¼å ¶æ ¹èç¹å¯ä»¥æ¯ä»»ä½ JSON ç±»åãå½å¤çå®åä¸ªæ ¹èç¹äºä»¶ï¼å¦ `String()`ï¼ï¼æå¹é çæå `EndObject()` æ `EndArray()` äºä»¶ï¼è¾åºç JSON æ¯ç»æå®æ´ï¼well-formedï¼åå®æ´çã使ç¨è å¯è°ç¨ `Writer::IsComplete()` å»æ£æµå®æ´æ§ã + +å½ JSON å®æ´æ¶ï¼`Writer` ä¸è½åæ¥åæ°çäºä»¶ãä¸ç¶å ¶è¾åºä¾¿ä¼æ¯ä¸åæ³çï¼ä¾å¦æè¶ è¿ä¸ä¸ªæ ¹èç¹ï¼ã为äºéæ°å©ç¨ `Writer` 对象ï¼ä½¿ç¨è å¯è°ç¨ `Writer::Reset(OutputStream& os)` å»éç½®å ¶ææå é¨ç¶æå设置æ°çè¾åºæµã + +# æå·§ {#SaxTechniques} + +## 解æ JSON è³èªå®ä¹ç»æ {#CustomDataStructure} + +`Document` ç解æåè½å®å ¨ä¾é `Reader`ãå®é ä¸ `Document` æ¯ä¸ä¸ªå¤çå¨ï¼å¨è§£æ JSON æ¶æ¥æ¶äºä»¶å»å»ºç«ä¸ä¸ª DOMã + +使ç¨è å¯ä»¥ç´æ¥ä½¿ç¨ `Reader` å»å»ºç«å ¶ä»æ°æ®ç»æãè¿æ¶é¤äºå»ºç« DOM çæ¥éª¤ï¼ä»èåå°äºå åå¼é并æ¹åæ§è½ã + +å¨ä»¥ä¸ç `messagereader` ä¾åä¸ï¼`ParseMessages()` 解æä¸ä¸ª JSONï¼è¯¥ JSON åºè¯¥æ¯ä¸ä¸ªå«é®å¼å¯¹ç objectã + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/error/en.h" +#include <iostream> +#include <string> +#include <map> + +using namespace std; +using namespace rapidjson; + +typedef map<string, string> MessageMap; + +struct MessageHandler + : public BaseReaderHandler<UTF8<>, MessageHandler> { + MessageHandler() : state_(kExpectObjectStart) { + } + + bool StartObject() { + switch (state_) { + case kExpectObjectStart: + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool String(const char* str, SizeType length, bool) { + switch (state_) { + case kExpectNameOrObjectEnd: + name_ = string(str, length); + state_ = kExpectValue; + return true; + case kExpectValue: + messages_.insert(MessageMap::value_type(name_, string(str, length))); + state_ = kExpectNameOrObjectEnd; + return true; + default: + return false; + } + } + + bool EndObject(SizeType) { return state_ == kExpectNameOrObjectEnd; } + + bool Default() { return false; } // All other events are invalid. + + MessageMap messages_; + enum State { + kExpectObjectStart, + kExpectNameOrObjectEnd, + kExpectValue, + }state_; + std::string name_; +}; + +void ParseMessages(const char* json, MessageMap& messages) { + Reader reader; + MessageHandler handler; + StringStream ss(json); + if (reader.Parse(ss, handler)) + messages.swap(handler.messages_); // Only change it if success. + else { + ParseErrorCode e = reader.GetParseErrorCode(); + size_t o = reader.GetErrorOffset(); + cout << "Error: " << GetParseError_En(e) << endl;; + cout << " at offset " << o << " near '" << string(json).substr(o, 10) << "...'" << endl; + } +} + +int main() { + MessageMap messages; + + const char* json1 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\" }"; + cout << json1 << endl; + ParseMessages(json1, messages); + + for (MessageMap::const_iterator itr = messages.begin(); itr != messages.end(); ++itr) + cout << itr->first << ": " << itr->second << endl; + + cout << endl << "Parse a JSON with invalid schema." << endl; + const char* json2 = "{ \"greeting\" : \"Hello!\", \"farewell\" : \"bye-bye!\", \"foo\" : {} }"; + cout << json2 << endl; + ParseMessages(json2, messages); + + return 0; +} +~~~~~~~~~~ + +~~~~~~~~~~ +{ "greeting" : "Hello!", "farewell" : "bye-bye!" } +farewell: bye-bye! +greeting: Hello! + +Parse a JSON with invalid schema. +{ "greeting" : "Hello!", "farewell" : "bye-bye!", "foo" : {} } +Error: Terminate parsing due to Handler error. + at offset 59 near '} }...' +~~~~~~~~~~ + +第ä¸ä¸ª JSONï¼`json1`ï¼è¢«æåå°è§£æè³ `MessageMap`ãç±äº `MessageMap` æ¯ä¸ä¸ª `std::map`ï¼æå°æ¬¡åºæé®å¼æåºãæ¤æ¬¡åºä¸ JSON ä¸ç次åºä¸åã + +å¨ç¬¬äºä¸ª JSONï¼`json2`ï¼ä¸ï¼`foo` çå¼æ¯ä¸ä¸ªç©º objectãç±äºå®æ¯ä¸ä¸ª objectï¼`MessageHandler::StartObject()` ä¼è¢«è°ç¨ãç¶èï¼å¨ `state_ = kExpectValue` çæ åµä¸ï¼è¯¥å½æ°ä¼è¿å `false`ï¼å¹¶å¯¼è´è§£æè¿ç¨ç»æ¢ãé误代ç æ¯ `kParseErrorTermination`ã + +## è¿æ»¤ JSON {#Filtering} + +å¦åé¢æåè¿ï¼`Writer` å¯å¤ç `Reader` ååºçäºä»¶ã`example/condense/condense.cpp` ä¾åç®åå°è®¾ç½® `Writer` ä½ä¸ºä¸ä¸ª `Reader` çå¤çå¨ï¼å æ¤å®è½ç§»é¤ JSON ä¸çææ空ç½å符ã`example/pretty/pretty.cpp` ä¾å使ç¨åæ ·çå ³ç³»ï¼åªæ¯ä»¥ `PrettyWriter` å代 `Writer`ãå æ¤ `pretty` è½å¤éæ°æ ¼å¼å JSONï¼å å ¥ç¼©è¿åæ¢è¡ã + +å®é ä¸ï¼æ们å¯ä»¥ä½¿ç¨ SAX é£æ ¼ API å»å å ¥ï¼å¤ä¸ªï¼ä¸é´å±å»è¿æ»¤ JSON çå 容ãä¾å¦ `capitalize` ä¾åå¯ä»¥æææ JSON string æ¹ä¸ºå¤§åã + +~~~~~~~~~~cpp +#include "rapidjson/reader.h" +#include "rapidjson/writer.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/filewritestream.h" +#include "rapidjson/error/en.h" +#include <vector> +#include <cctype> + +using namespace rapidjson; + +template<typename OutputHandler> +struct CapitalizeFilter { + CapitalizeFilter(OutputHandler& out) : out_(out), buffer_() { + } + + bool Null() { return out_.Null(); } + bool Bool(bool b) { return out_.Bool(b); } + bool Int(int i) { return out_.Int(i); } + bool Uint(unsigned u) { return out_.Uint(u); } + bool Int64(int64_t i) { return out_.Int64(i); } + bool Uint64(uint64_t u) { return out_.Uint64(u); } + bool Double(double d) { return out_.Double(d); } + bool RawNumber(const char* str, SizeType length, bool copy) { return out_.RawNumber(str, length, copy); } + bool String(const char* str, SizeType length, bool) { + buffer_.clear(); + for (SizeType i = 0; i < length; i++) + buffer_.push_back(std::toupper(str[i])); + return out_.String(&buffer_.front(), length, true); // true = output handler need to copy the string + } + bool StartObject() { return out_.StartObject(); } + bool Key(const char* str, SizeType length, bool copy) { return String(str, length, copy); } + bool EndObject(SizeType memberCount) { return out_.EndObject(memberCount); } + bool StartArray() { return out_.StartArray(); } + bool EndArray(SizeType elementCount) { return out_.EndArray(elementCount); } + + OutputHandler& out_; + std::vector<char> buffer_; +}; + +int main(int, char*[]) { + // Prepare JSON reader and input stream. + Reader reader; + char readBuffer[65536]; + FileReadStream is(stdin, readBuffer, sizeof(readBuffer)); + + // Prepare JSON writer and output stream. + char writeBuffer[65536]; + FileWriteStream os(stdout, writeBuffer, sizeof(writeBuffer)); + Writer<FileWriteStream> writer(os); + + // JSON reader parse from the input stream and let writer generate the output. + CapitalizeFilter<Writer<FileWriteStream> > filter(writer); + if (!reader.Parse(is, filter)) { + fprintf(stderr, "\nError(%u): %s\n", (unsigned)reader.GetErrorOffset(), GetParseError_En(reader.GetParseErrorCode())); + return 1; + } + + return 0; +} +~~~~~~~~~~ + +注æå°ï¼ä¸å¯ç®åå°æ JSON å½ä½å符串å»æ¹ä¸ºå¤§åãä¾å¦ï¼ +~~~~~~~~~~ +["Hello\nWorld"] +~~~~~~~~~~ + +ç®åå°ææ´ä¸ª JSON 转为大åçè¯ä¼äº§çé误ç转ä¹ç¬¦ï¼ +~~~~~~~~~~ +["HELLO\NWORLD"] +~~~~~~~~~~ + +è `capitalize` å°±ä¼äº§çæ£ç¡®çç»æï¼ +~~~~~~~~~~ +["HELLO\nWORLD"] +~~~~~~~~~~ + +æ们è¿å¯ä»¥å¼åæ´å¤æçè¿æ»¤å¨ãç¶èï¼ç±äº SAX é£æ ¼ API å¨æä¸æ¶é´ç¹åªè½æä¾åä¸äºä»¶çä¿¡æ¯ï¼ä½¿ç¨è éè¦èªè¡è®°å½ä¸äºä¸ä¸æä¿¡æ¯ï¼ä¾å¦ä»æ ¹èç¹èµ·çè·¯å¾ãå¨åå ¶ä»ç¸å ³å¼ï¼ã对äºå¤çæäºæ åµï¼ç¨ DOM ä¼æ¯ SAX æ´å®¹æå®ç°ã + http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/schema.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/schema.md b/thirdparty/rapidjson-1.1.0/doc/schema.md new file mode 100644 index 0000000..a83cebc --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/schema.md @@ -0,0 +1,237 @@ +# Schema + +(This feature was released in v1.1.0) + +JSON Schema is a draft standard for describing the format of JSON data. The schema itself is also JSON data. By validating a JSON structure with JSON Schema, your code can safely access the DOM without manually checking types, or whether a key exists, etc. It can also ensure that the serialized JSON conform to a specified schema. + +RapidJSON implemented a JSON Schema validator for [JSON Schema Draft v4](http://json-schema.org/documentation.html). If you are not familiar with JSON Schema, you may refer to [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/). + +[TOC] + +## Basic Usage + +First of all, you need to parse a JSON Schema into `Document`, and then compile the `Document` into a `SchemaDocument`. + +Secondly, construct a `SchemaValidator` with the `SchemaDocument`. It is similar to a `Writer` in the sense of handling SAX events. So, you can use `document.Accept(validator)` to validate a document, and then check the validity. + +~~~cpp +#include "rapidjson/schema.h" + +// ... + +Document sd; +if (!sd.Parse(schemaJson).HasParseError()) { + // the schema is not a valid JSON. + // ... +} +SchemaDocument schema(sd); // Compile a Document to SchemaDocument +// sd is no longer needed here. + +Document d; +if (!d.Parse(inputJson).HasParseError()) { + // the input is not a valid JSON. + // ... +} + +SchemaValidator validator(schema); +if (!d.Accept(validator)) { + // Input JSON is invalid according to the schema + // Output diagnostic information + StringBuffer sb; + validator.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword()); + sb.Clear(); + validator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); +} +~~~ + +Some notes: + +* One `SchemaDocment` can be referenced by multiple `SchemaValidator`s. It will not be modified by `SchemaValidator`s. +* A `SchemaValidator` may be reused to validate multiple documents. To run it for other documents, call `validator.Reset()` first. + +## Validation during parsing/serialization + +Unlike most JSON Schema validator implementations, RapidJSON provides a SAX-based schema validator. Therefore, you can parse a JSON from a stream while validating it on the fly. If the validator encounters a JSON value that invalidates the supplied schema, the parsing will be terminated immediately. This design is especially useful for parsing large JSON files. + +### DOM parsing + +For using DOM in parsing, `Document` needs some preparation and finalizing tasks, in addition to receiving SAX events, thus it needs some work to route the reader, validator and the document. `SchemaValidatingReader` is a helper class that doing such work. + +~~~cpp +#include "rapidjson/filereadstream.h" + +// ... +SchemaDocument schema(sd); // Compile a Document to SchemaDocument + +// Use reader to parse the JSON +FILE* fp = fopen("big.json", "r"); +FileReadStream is(fp, buffer, sizeof(buffer)); + +// Parse JSON from reader, validate the SAX events, and store in d. +Document d; +SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema); +d.Populate(reader); + +if (!reader.GetParseResult()) { + // Not a valid JSON + // When reader.GetParseResult().Code() == kParseErrorTermination, + // it may be terminated by: + // (1) the validator found that the JSON is invalid according to schema; or + // (2) the input stream has I/O error. + + // Check the validation result + if (!reader.IsValid()) { + // Input JSON is invalid according to the schema + // Output diagnostic information + StringBuffer sb; + reader.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword()); + sb.Clear(); + reader.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); + } +} +~~~ + +### SAX parsing + +For using SAX in parsing, it is much simpler. If it only need to validate the JSON without further processing, it is simply: + +~~~ +SchemaValidator validator(schema); +Reader reader; +if (!reader.Parse(stream, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +This is exactly the method used in the [schemavalidator](example/schemavalidator/schemavalidator.cpp) example. The distinct advantage is low memory usage, no matter how big the JSON was (the memory usage depends on the complexity of the schema). + +If you need to handle the SAX events further, then you need to use the template class `GenericSchemaValidator` to set the output handler of the validator: + +~~~ +MyHandler handler; +GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler); +Reader reader; +if (!reader.Parse(ss, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +### Serialization + +It is also possible to do validation during serializing. This can ensure the result JSON is valid according to the JSON schema. + +~~~ +StringBuffer sb; +Writer<StringBuffer> writer(sb); +GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer); +if (!d.Accept(validator)) { + // Some problem during Accept(), it may be validation or encoding issues. + if (!validator.IsValid()) { + // ... + } +} +~~~ + +Of course, if your application only needs SAX-style serialization, it can simply send SAX events to `SchemaValidator` instead of `Writer`. + +## Remote Schema + +JSON Schema supports [`$ref` keyword](http://spacetelescope.github.io/understanding-json-schema/structuring.html), which is a [JSON pointer](doc/pointer.md) referencing to a local or remote schema. Local pointer is prefixed with `#`, while remote pointer is an relative or absolute URI. For example: + +~~~js +{ "$ref": "definitions.json#/address" } +~~~ + +As `SchemaDocument` does not know how to resolve such URI, it needs a user-provided `IRemoteSchemaDocumentProvider` instance to do so. + +~~~ +class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider { +public: + virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeTyp length) { + // Resolve the uri and returns a pointer to that schema. + } +}; + +// ... + +MyRemoteSchemaDocumentProvider provider; +SchemaDocument schema(sd, &provider); +~~~ + +## Conformance + +RapidJSON passed 262 out of 263 tests in [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4). + +The failed test is "changed scope ref invalid" of "change resolution scope" in `refRemote.json`. It is due to that `id` schema keyword and URI combining function are not implemented. + +Besides, the `format` schema keyword for string values is ignored, since it is not required by the specification. + +### Regular Expression + +The schema keyword `pattern` and `patternProperties` uses regular expression to match the required pattern. + +RapidJSON implemented a simple NFA regular expression engine, which is used by default. It supports the following syntax. + +|Syntax|Description| +|------|-----------| +|`ab` | Concatenation | +|`a|b` | Alternation | +|`a?` | Zero or one | +|`a*` | Zero or more | +|`a+` | One or more | +|`a{3}` | Exactly 3 times | +|`a{3,}` | At least 3 times | +|`a{3,5}`| 3 to 5 times | +|`(ab)` | Grouping | +|`^a` | At the beginning | +|`a$` | At the end | +|`.` | Any character | +|`[abc]` | Character classes | +|`[a-c]` | Character class range | +|`[a-z0-9_]` | Character class combination | +|`[^abc]` | Negated character classes | +|`[^a-c]` | Negated character class range | +|`[\b]` | Backspace (U+0008) | +|`\|`, `\\`, ... | Escape characters | +|`\f` | Form feed (U+000C) | +|`\n` | Line feed (U+000A) | +|`\r` | Carriage return (U+000D) | +|`\t` | Tab (U+0009) | +|`\v` | Vertical tab (U+000B) | + +For C++11 compiler, it is also possible to use the `std::regex` by defining `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` and `RAPIDJSON_SCHEMA_USE_STDREGEX=1`. If your schemas do not need `pattern` and `patternProperties`, you can set both macros to zero to disable this feature, which will reduce some code size. + +## Performance + +Most C++ JSON libraries do not yet support JSON Schema. So we tried to evaluate the performance of RapidJSON's JSON Schema validator according to [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark), which tests 11 JavaScript libraries running on Node.js. + +That benchmark runs validations on [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite), in which some test suites and tests are excluded. We made the same benchmarking procedure in [`schematest.cpp`](test/perftest/schematest.cpp). + +On a Mac Book Pro (2.8 GHz Intel Core i7), the following results are collected. + +|Validator|Relative speed|Number of test runs per second| +|---------|:------------:|:----------------------------:| +|RapidJSON|155%|30682| +|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)| +|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)| +|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)| +|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)| +|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)| +|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)| +|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)| +|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)| +|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)| +|tv4|0.5%|93 (± 0.94%)| +|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)| + +That is, RapidJSON is about 1.5x faster than the fastest JavaScript library (ajv). And 1400x faster than the slowest one. http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/schema.zh-cn.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/schema.zh-cn.md b/thirdparty/rapidjson-1.1.0/doc/schema.zh-cn.md new file mode 100644 index 0000000..a01c1b1 --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/schema.zh-cn.md @@ -0,0 +1,237 @@ +# Schema + +ï¼æ¬åè½äº v1.1.0 åå¸ï¼ + +JSON Schema æ¯æè¿° JSON æ ¼å¼çä¸ä¸ªæ åèæ¡ãä¸ä¸ª schema æ¬èº«ä¹æ¯ä¸ä¸ª JSONãä½¿ç¨ JSON Schema å»æ ¡éª JSONï¼å¯ä»¥è®©ä½ ç代ç å®å ¨å°è®¿é® DOMï¼èæ é¡»æ£æ¥ç±»åæé®å¼æ¯å¦åå¨çãè¿ä¹è½ç¡®ä¿è¾åºç JSON æ¯ç¬¦åæå®ç schemaã + +RapidJSON å®ç°äºä¸ä¸ª [JSON Schema Draft v4](http://json-schema.org/documentation.html) çæ ¡éªå¨ãè¥ä½ ä¸çæ JSON Schemaï¼å¯ä»¥åè [Understanding JSON Schema](http://spacetelescope.github.io/understanding-json-schema/)ã + +[TOC] + +## åºæ¬ç¨æ³ + +é¦å ï¼ä½ è¦æ JSON Schema 解ææ `Document`ï¼åæå®ç¼è¯æä¸ä¸ª `SchemaDocument`ã + +ç¶åï¼å©ç¨è¯¥ `SchemaDocument` å建ä¸ä¸ª `SchemaValidator`ãå®ä¸ `Writer` ç¸ä¼¼ï¼é½æ¯è½å¤å¤ç SAX äºä»¶çãå æ¤ï¼ä½ å¯ä»¥ç¨ `document.Accept(validator)` å»æ ¡éªä¸ä¸ª JSONï¼ç¶ååè·åæ ¡éªç»æã + +~~~cpp +#include "rapidjson/schema.h" + +// ... + +Document sd; +if (!sd.Parse(schemaJson).HasParseError()) { + // æ¤ schema ä¸æ¯åæ³ç JSON + // ... +} +SchemaDocument schema(sd); // æä¸ä¸ª Document ç¼è¯è³ SchemaDocument +// ä¹åä¸åéè¦ sd + +Document d; +if (!d.Parse(inputJson).HasParseError()) { + // è¾å ¥ä¸æ¯ä¸ä¸ªåæ³ç JSON + // ... +} + +SchemaValidator validator(schema); +if (!d.Accept(validator)) { + // è¾å ¥ç JSON ä¸åä¹ schema + // æå°è¯æä¿¡æ¯ + StringBuffer sb; + validator.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", validator.GetInvalidSchemaKeyword()); + sb.Clear(); + validator.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); +} +~~~ + +ä¸äºæ³¨æç¹ï¼ + +* ä¸ä¸ª `SchemaDocment` è½è¢«å¤ä¸ª `SchemaValidator` å¼ç¨ãå®ä¸ä¼è¢« `SchemaValidator` ä¿®æ¹ã +* å¯ä»¥éå¤ä½¿ç¨ä¸ä¸ª `SchemaValidator` æ¥æ ¡éªå¤ä¸ªæ件ãå¨æ ¡éªå ¶ä»æ件åï¼é¡»å è°ç¨ `validator.Reset()`ã + +## å¨è§£æï¼çææ¶è¿è¡æ ¡éª + +ä¸å¤§é¨å JSON Schema æ ¡éªå¨ææä¸åï¼RapidJSON æä¾äºä¸ä¸ªåºäº SAX ç schema æ ¡éªå¨å®ç°ãå æ¤ï¼ä½ å¯ä»¥å¨è¾å ¥æµè§£æ JSON çåæ¶è¿è¡æ ¡éªãè¥æ ¡éªå¨éå°ä¸ä¸ªä¸ schema ä¸ç¬¦çå¼ï¼å°±ä¼ç«å³ç»æ¢è§£æãè¿è®¾è®¡å¯¹äºè§£æ大å JSON æ件æ¶ç¹å«æç¨ã + +### DOM 解æ + +å¨ä½¿ç¨ DOM è¿è¡è§£ææ¶ï¼`Document` é¤äºæ¥æ¶ SAX äºä»¶å¤ï¼è¿éåä¸äºåå¤åç»æå·¥ä½ï¼å æ¤ï¼ä¸ºäºè¿æ¥ `Reader`ã`SchemaValidator` å `Document` è¦åå¤ä¸ç¹äºæ ã`SchemaValidatingReader` æ¯ä¸ä¸ªè¾ å©ç±»å»åé£äºå·¥ä½ã + +~~~cpp +#include "rapidjson/filereadstream.h" + +// ... +SchemaDocument schema(sd); // æä¸ä¸ª Document ç¼è¯è³ SchemaDocument + +// ä½¿ç¨ reader 解æ JSON +FILE* fp = fopen("big.json", "r"); +FileReadStream is(fp, buffer, sizeof(buffer)); + +// ç¨ reader 解æ JSONï¼æ ¡éªå®ç SAX äºä»¶ï¼å¹¶åå¨è³ d +Document d; +SchemaValidatingReader<kParseDefaultFlags, FileReadStream, UTF8<> > reader(is, schema); +d.Populate(reader); + +if (!reader.GetParseResult()) { + // ä¸æ¯ä¸ä¸ªåæ³ç JSON + // å½ reader.GetParseResult().Code() == kParseErrorTermination, + // å®å¯è½æ¯è¢«ä»¥ä¸åå ä¸æ¢ï¼ + // (1) æ ¡éªå¨åç° JSON ä¸åä¹ schemaï¼æ + // (2) è¾å ¥æµæ I/O é误ã + + // æ£æ¥æ ¡éªç»æ + if (!reader.IsValid()) { + // è¾å ¥ç JSON ä¸åä¹ schema + // æå°è¯æä¿¡æ¯ + StringBuffer sb; + reader.GetInvalidSchemaPointer().StringifyUriFragment(sb); + printf("Invalid schema: %s\n", sb.GetString()); + printf("Invalid keyword: %s\n", reader.GetInvalidSchemaKeyword()); + sb.Clear(); + reader.GetInvalidDocumentPointer().StringifyUriFragment(sb); + printf("Invalid document: %s\n", sb.GetString()); + } +} +~~~ + +### SAX 解æ + +ä½¿ç¨ SAX 解ææ¶ï¼æ åµå°±ç®åå¾å¤ãè¥åªéè¦æ ¡éª JSON èæ éè¿ä¸æ¥å¤çï¼é£ä¹ä» éè¦ï¼ + +~~~ +SchemaValidator validator(schema); +Reader reader; +if (!reader.Parse(stream, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +è¿ç§æ¹å¼å [schemavalidator](example/schemavalidator/schemavalidator.cpp) ä¾åå®å ¨ç¸åãè¿å¸¦æ¥çç¬ç¹ä¼å¿æ¯ï¼æ 论 JSON å¤å·¨å¤§ï¼æ°¸è¿ç»´æä½å åç¨éï¼å åç¨éåªä¸ Schema çå¤æ度ç¸å ³ï¼ã + +è¥ä½ éè¦è¿ä¸æ¥å¤ç SAX äºä»¶ï¼ä¾¿å¯ä½¿ç¨æ¨¡æ¿ç±» `GenericSchemaValidator` å»è®¾ç½®æ ¡éªå¨çè¾åº `Handler`ï¼ + +~~~ +MyHandler handler; +GenericSchemaValidator<SchemaDocument, MyHandler> validator(schema, handler); +Reader reader; +if (!reader.Parse(ss, validator)) { + if (!validator.IsValid()) { + // ... + } +} +~~~ + +### çæ + +æ们ä¹å¯ä»¥å¨çæï¼serializationï¼çæ¶åè¿è¡æ ¡éªãè¿è½ç¡®ä¿è¾åºç JSON 符åä¸ä¸ª JSON Schemaã + +~~~ +StringBuffer sb; +Writer<StringBuffer> writer(sb); +GenericSchemaValidator<SchemaDocument, Writer<StringBuffer> > validator(s, writer); +if (!d.Accept(validator)) { + // Some problem during Accept(), it may be validation or encoding issues. + if (!validator.IsValid()) { + // ... + } +} +~~~ + +å½ç¶ï¼å¦æä½ çåºç¨ä» éè¦ SAX é£æ ¼ççæï¼é£ä¹åªéè¦æ SAX äºä»¶ç±åæ¥åéå° `Writer`ï¼æ¹ä¸ºåéå° `SchemaValidator`ã + +## è¿ç¨ Schema + +JSON Schema æ¯æ [`$ref` å ³é®å](http://spacetelescope.github.io/understanding-json-schema/structuring.html)ï¼å®æ¯ä¸ä¸ª [JSON pointer](doc/pointer.zh-cn.md) å¼ç¨è³ä¸ä¸ªæ¬å°ï¼localï¼æè¿ç¨ï¼remoteï¼ schemaãæ¬å°æéçé¦åç¬¦æ¯ `#`ï¼èè¿ç¨æéæ¯ä¸ä¸ªç¸å¯¹æç»å¯¹ URIãä¾å¦ï¼ + +~~~js +{ "$ref": "definitions.json#/address" } +~~~ + +ç±äº `SchemaDocument` 并ä¸ç¥éå¦ä½å¤çé£äº URIï¼å®éè¦ä½¿ç¨è æä¾ä¸ä¸ª `IRemoteSchemaDocumentProvider` çå®ä¾å»å¤çã + +~~~ +class MyRemoteSchemaDocumentProvider : public IRemoteSchemaDocumentProvider { +public: + virtual const SchemaDocument* GetRemoteDocument(const char* uri, SizeTyp length) { + // Resolve the uri and returns a pointer to that schema. + } +}; + +// ... + +MyRemoteSchemaDocumentProvider provider; +SchemaDocument schema(sd, &provider); +~~~ + +## æ åç符åç¨åº¦ + +RapidJSON éè¿äº [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) (Json Schema draft 4) ä¸ 263 个æµè¯ç 262 个ã + +没éè¿çæµè¯æ¯ `refRemote.json` ä¸ç "change resolution scope" - "changed scope ref invalid"ãè¿æ¯ç±äºæªå®ç° `id` schema å ³é®åå URI å并åè½ã + +é¤æ¤ä»¥å¤ï¼å ³äºå符串类åç `format` schema å ³é®åä¹ä¼è¢«å¿½ç¥ï¼å 为æ åä¸å¹¶æ²¡éæ±å¿ é¡»å®ç°ã + +### æ£åè¡¨è¾¾å¼ + +`pattern` å `patternProperties` è¿ä¸¤ä¸ª schema å ³é®å使ç¨äºæ£å表达å¼å»å¹é æéç模å¼ã + +RapidJSON å®ç°äºä¸ä¸ªç®åç NFA æ£å表达å¼å¼æï¼å¹¶é¢è®¾ä½¿ç¨ãå®æ¯æ以ä¸è¯æ³ã + +|è¯æ³|æè¿°| +|------|-----------| +|`ab` | 串è | +|`a|b` | äº¤æ¿ | +|`a?` | é¶æä¸æ¬¡ | +|`a*` | é¶æå¤æ¬¡ | +|`a+` | ä¸æå¤æ¬¡ | +|`a{3}` | å好 3 次 | +|`a{3,}` | è³å° 3 次 | +|`a{3,5}`| 3 è³ 5 次 | +|`(ab)` | åç» | +|`^a` | å¨å¼å§å¤ | +|`a$` | å¨ç»æå¤ | +|`.` | ä»»ä½å符 | +|`[abc]` | åç¬¦ç» | +|`[a-c]` | å符ç»èå´ | +|`[a-z0-9_]` | å符ç»ç»å | +|`[^abc]` | å符ç»åå | +|`[^a-c]` | å符ç»èå´åå | +|`[\b]` | éæ ¼ç¬¦ (U+0008) | +|`\|`, `\\`, ... | 转ä¹å符 | +|`\f` | é¦é¡µ (U+000C) | +|`\n` | é¦è¡ (U+000A) | +|`\r` | å车 (U+000D) | +|`\t` | å¶è¡¨ (U+0009) | +|`\v` | åç´å¶è¡¨ (U+000B) | + +对äºä½¿ç¨ C++11 ç¼è¯å¨ç使ç¨è ï¼ä¹å¯ä½¿ç¨ `std::regex`ï¼åªéå®ä¹ `RAPIDJSON_SCHEMA_USE_INTERNALREGEX=0` å `RAPIDJSON_SCHEMA_USE_STDREGEX=1`ãè¥ä½ ç schema æ éä½¿ç¨ `pattern` æ `patternProperties`ï¼å¯ä»¥æ两个å®é½è®¾ä¸ºé¶ï¼ä»¥ç¦ç¨æ¤åè½ï¼è¿æ ·åå¯èçä¸äºä»£ç ä½ç§¯ã + +## æ§è½ + +大é¨å C++ JSON åºé½æªæ¯æ JSON Schemaãå æ¤æ们å°è¯æç § [json-schema-benchmark](https://github.com/ebdrup/json-schema-benchmark) å»è¯ä¼° RapidJSON ç JSON Schema æ ¡éªå¨ã该è¯æµæµè¯äº 11 个è¿è¡å¨ node.js ä¸ç JavaScript åºã + +该è¯æµæ ¡éª [JSON Schema Test Suite](https://github.com/json-schema/JSON-Schema-Test-Suite) ä¸çæµè¯ï¼å½ä¸æé¤äºä¸äºæµè¯å¥ä»¶å个å«æµè¯ãæä»¬å¨ [`schematest.cpp`](test/perftest/schematest.cpp) å®ç°äºç¸åçè¯æµã + +å¨ MacBook Pro (2.8 GHz Intel Core i7) ä¸æ¶éå°ä»¥ä¸ç»æã + +|æ ¡éªå¨|ç¸å¯¹é度|æ¯ç§æ§è¡çæµè¯æ°ç®| +|---------|:------------:|:----------------------------:| +|RapidJSON|155%|30682| +|[`ajv`](https://github.com/epoberezkin/ajv)|100%|19770 (± 1.31%)| +|[`is-my-json-valid`](https://github.com/mafintosh/is-my-json-valid)|70%|13835 (± 2.84%)| +|[`jsen`](https://github.com/bugventure/jsen)|57.7%|11411 (± 1.27%)| +|[`schemasaurus`](https://github.com/AlexeyGrishin/schemasaurus)|26%|5145 (± 1.62%)| +|[`themis`](https://github.com/playlyfe/themis)|19.9%|3935 (± 2.69%)| +|[`z-schema`](https://github.com/zaggino/z-schema)|7%|1388 (± 0.84%)| +|[`jsck`](https://github.com/pandastrike/jsck#readme)|3.1%|606 (± 2.84%)| +|[`jsonschema`](https://github.com/tdegrunt/jsonschema#readme)|0.9%|185 (± 1.01%)| +|[`skeemas`](https://github.com/Prestaul/skeemas#readme)|0.8%|154 (± 0.79%)| +|tv4|0.5%|93 (± 0.94%)| +|[`jayschema`](https://github.com/natesilva/jayschema)|0.1%|21 (± 1.14%)| + +æ¢è¨ä¹ï¼RapidJSON æ¯æå¿«ç JavaScript åºï¼ajvï¼å¿«çº¦ 1.5xãæ¯ææ ¢çå¿« 1400xã http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/fd280b5c/thirdparty/rapidjson-1.1.0/doc/stream.md ---------------------------------------------------------------------- diff --git a/thirdparty/rapidjson-1.1.0/doc/stream.md b/thirdparty/rapidjson-1.1.0/doc/stream.md new file mode 100644 index 0000000..b79ce53 --- /dev/null +++ b/thirdparty/rapidjson-1.1.0/doc/stream.md @@ -0,0 +1,426 @@ +# Stream + +In RapidJSON, `rapidjson::Stream` is a concept for reading/writing JSON. Here we first show how to use streams provided. And then see how to create a custom stream. + +[TOC] + +# Memory Streams {#MemoryStreams} + +Memory streams store JSON in memory. + +## StringStream (Input) {#StringStream} + +`StringStream` is the most basic input stream. It represents a complete, read-only JSON stored in memory. It is defined in `rapidjson/rapidjson.h`. + +~~~~~~~~~~cpp +#include "rapidjson/document.h" // will include "rapidjson/rapidjson.h" + +using namespace rapidjson; + +// ... +const char json[] = "[1, 2, 3, 4]"; +StringStream s(json); + +Document d; +d.ParseStream(s); +~~~~~~~~~~ + +Since this is very common usage, `Document::Parse(const char*)` is provided to do exactly the same as above: + +~~~~~~~~~~cpp +// ... +const char json[] = "[1, 2, 3, 4]"; +Document d; +d.Parse(json); +~~~~~~~~~~ + +Note that, `StringStream` is a typedef of `GenericStringStream<UTF8<> >`, user may use another encodings to represent the character set of the stream. + +## StringBuffer (Output) {#StringBuffer} + +`StringBuffer` is a simple output stream. It allocates a memory buffer for writing the whole JSON. Use `GetString()` to obtain the buffer. + +~~~~~~~~~~cpp +#include "rapidjson/stringbuffer.h" + +StringBuffer buffer; +Writer<StringBuffer> writer(buffer); +d.Accept(writer); + +const char* output = buffer.GetString(); +~~~~~~~~~~ + +When the buffer is full, it will increases the capacity automatically. The default capacity is 256 characters (256 bytes for UTF8, 512 bytes for UTF16, etc.). User can provide an allocator and a initial capacity. + +~~~~~~~~~~cpp +StringBuffer buffer1(0, 1024); // Use its allocator, initial size = 1024 +StringBuffer buffer2(allocator, 1024); +~~~~~~~~~~ + +By default, `StringBuffer` will instantiate an internal allocator. + +Similarly, `StringBuffer` is a typedef of `GenericStringBuffer<UTF8<> >`. + +# File Streams {#FileStreams} + +When parsing a JSON from file, you may read the whole JSON into memory and use ``StringStream`` above. + +However, if the JSON is big, or memory is limited, you can use `FileReadStream`. It only read a part of JSON from file into buffer, and then let the part be parsed. If it runs out of characters in the buffer, it will read the next part from file. + +## FileReadStream (Input) {#FileReadStream} + +`FileReadStream` reads the file via a `FILE` pointer. And user need to provide a buffer. + +~~~~~~~~~~cpp +#include "rapidjson/filereadstream.h" +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("big.json", "rb"); // non-Windows use "r" + +char readBuffer[65536]; +FileReadStream is(fp, readBuffer, sizeof(readBuffer)); + +Document d; +d.ParseStream(is); + +fclose(fp); +~~~~~~~~~~ + +Different from string streams, `FileReadStream` is byte stream. It does not handle encodings. If the file is not UTF-8, the byte stream can be wrapped in a `EncodedInputStream`. It will be discussed very soon. + +Apart from reading file, user can also use `FileReadStream` to read `stdin`. + +## FileWriteStream (Output) {#FileWriteStream} + +`FileWriteStream` is buffered output stream. Its usage is very similar to `FileReadStream`. + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" +#include <cstdio> + +using namespace rapidjson; + +Document d; +d.Parse(json); +// ... + +FILE* fp = fopen("output.json", "wb"); // non-Windows use "w" + +char writeBuffer[65536]; +FileWriteStream os(fp, writeBuffer, sizeof(writeBuffer)); + +Writer<FileWriteStream> writer(os); +d.Accept(writer); + +fclose(fp); +~~~~~~~~~~ + +It can also directs the output to `stdout`. + +# iostream Wrapper {#iostreamWrapper} + +Due to users' requests, RapidJSON provided official wrappers for `std::basic_istream` and `std::basic_ostream`. However, please note that the performance will be much lower than the other streams above. + +## IStreamWrapper {#IStreamWrapper} + +`IStreamWrapper` wraps any class drived from `std::istream`, such as `std::istringstream`, `std::stringstream`, `std::ifstream`, `std::fstream`, into RapidJSON's input stream. + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/istreamwrapper.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +ifstream ifs("test.json"); +IStreamWrapper isw(ifs); + +Document d; +d.ParseStream(isw); +~~~ + +For classes derived from `std::wistream`, use `WIStreamWrapper`. + +## OStreamWrapper {#OStreamWrapper} + +Similarly, `OStreamWrapper` wraps any class derived from `std::ostream`, such as `std::ostringstream`, `std::stringstream`, `std::ofstream`, `std::fstream`, into RapidJSON's input stream. + +~~~cpp +#include <rapidjson/document.h> +#include <rapidjson/ostreamwrapper.h> +#include <rapidjson/writer.h> +#include <fstream> + +using namespace rapidjson; +using namespace std; + +Document d; +d.Parse(json); + +// ... + +ofstream ofs("output.json"); +OStreamWrapper osw(ofs); + +Writer<OStreamWrapper> writer(osw); +d.Accept(writer); +~~~ + +For classes derived from `std::wostream`, use `WOStreamWrapper`. + +# Encoded Streams {#EncodedStreams} + +Encoded streams do not contain JSON itself, but they wrap byte streams to provide basic encoding/decoding function. + +As mentioned above, UTF-8 byte streams can be read directly. However, UTF-16 and UTF-32 have endian issue. To handle endian correctly, it needs to convert bytes into characters (e.g. `wchar_t` for UTF-16) while reading, and characters into bytes while writing. + +Besides, it also need to handle [byte order mark (BOM)](http://en.wikipedia.org/wiki/Byte_order_mark). When reading from a byte stream, it is needed to detect or just consume the BOM if exists. When writing to a byte stream, it can optionally write BOM. + +If the encoding of stream is known in compile-time, you may use `EncodedInputStream` and `EncodedOutputStream`. If the stream can be UTF-8, UTF-16LE, UTF-16BE, UTF-32LE, UTF-32BE JSON, and it is only known in runtime, you may use `AutoUTFInputStream` and `AutoUTFOutputStream`. These streams are defined in `rapidjson/encodedstream.h`. + +Note that, these encoded streams can be applied to streams other than file. For example, you may have a file in memory, or a custom byte stream, be wrapped in encoded streams. + +## EncodedInputStream {#EncodedInputStream} + +`EncodedInputStream` has two template parameters. The first one is a `Encoding` class, such as `UTF8`, `UTF16LE`, defined in `rapidjson/encodings.h`. The second one is the class of stream to be wrapped. + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // EncodedInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("utf16le.json", "rb"); // non-Windows use "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +EncodedInputStream<UTF16LE<>, FileReadStream> eis(bis); // wraps bis into eis + +Document d; // Document is GenericDocument<UTF8<> > +d.ParseStream<0, UTF16LE<> >(eis); // Parses UTF-16LE file into UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +## EncodedOutputStream {#EncodedOutputStream} + +`EncodedOutputStream` is similar but it has a `bool putBOM` parameter in the constructor, controlling whether to write BOM into output byte stream. + +~~~~~~~~~~cpp +#include "rapidjson/filewritestream.h" // FileWriteStream +#include "rapidjson/encodedstream.h" // EncodedOutputStream +#include <cstdio> + +Document d; // Document is GenericDocument<UTF8<> > +// ... + +FILE* fp = fopen("output_utf32le.json", "wb"); // non-Windows use "w" + +char writeBuffer[256]; +FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + +typedef EncodedOutputStream<UTF32LE<>, FileWriteStream> OutputStream; +OutputStream eos(bos, true); // Write BOM + +Writer<OutputStream, UTF32LE<>, UTF8<>> writer(eos); +d.Accept(writer); // This generates UTF32-LE file from UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +## AutoUTFInputStream {#AutoUTFInputStream} + +Sometimes an application may want to handle all supported JSON encoding. `AutoUTFInputStream` will detection encoding by BOM first. If BOM is unavailable, it will use characteristics of valid JSON to make detection. If neither method success, it falls back to the UTF type provided in constructor. + +Since the characters (code units) may be 8-bit, 16-bit or 32-bit. `AutoUTFInputStream` requires a character type which can hold at least 32-bit. We may use `unsigned`, as in the template parameter: + +~~~~~~~~~~cpp +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" // FileReadStream +#include "rapidjson/encodedstream.h" // AutoUTFInputStream +#include <cstdio> + +using namespace rapidjson; + +FILE* fp = fopen("any.json", "rb"); // non-Windows use "r" + +char readBuffer[256]; +FileReadStream bis(fp, readBuffer, sizeof(readBuffer)); + +AutoUTFInputStream<unsigned, FileReadStream> eis(bis); // wraps bis into eis + +Document d; // Document is GenericDocument<UTF8<> > +d.ParseStream<0, AutoUTF<unsigned> >(eis); // This parses any UTF file into UTF-8 in memory + +fclose(fp); +~~~~~~~~~~ + +When specifying the encoding of stream, uses `AutoUTF<CharType>` as in `ParseStream()` above. + +You can obtain the type of UTF via `UTFType GetType()`. And check whether a BOM is found by `HasBOM()` + +## AutoUTFOutputStream {#AutoUTFOutputStream} + +Similarly, to choose encoding for output during runtime, we can use `AutoUTFOutputStream`. This class is not automatic *per se*. You need to specify the UTF type and whether to write BOM in runtime. + +~~~~~~~~~~cpp +using namespace rapidjson; + +void WriteJSONFile(FILE* fp, UTFType type, bool putBOM, const Document& d) { + char writeBuffer[256]; + FileWriteStream bos(fp, writeBuffer, sizeof(writeBuffer)); + + typedef AutoUTFOutputStream<unsigned, FileWriteStream> OutputStream; + OutputStream eos(bos, type, putBOM); + + Writer<OutputStream, UTF8<>, AutoUTF<> > writer; + d.Accept(writer); +} +~~~~~~~~~~ + +`AutoUTFInputStream` and `AutoUTFOutputStream` is more convenient than `EncodedInputStream` and `EncodedOutputStream`. They just incur a little bit runtime overheads. + +# Custom Stream {#CustomStream} + +In addition to memory/file streams, user can create their own stream classes which fits RapidJSON's API. For example, you may create network stream, stream from compressed file, etc. + +RapidJSON combines different types using templates. A class containing all required interface can be a stream. The Stream interface is defined in comments of `rapidjson/rapidjson.h`: + +~~~~~~~~~~cpp +concept Stream { + typename Ch; //!< Character type of the stream. + + //! Read the current character from stream without moving the read cursor. + Ch Peek() const; + + //! Read the current character from stream and moving the read cursor to next character. + Ch Take(); + + //! Get the current read cursor. + //! \return Number of characters read from start. + size_t Tell(); + + //! Begin writing operation at the current read pointer. + //! \return The begin writer pointer. + Ch* PutBegin(); + + //! Write a character. + void Put(Ch c); + + //! Flush the buffer. + void Flush(); + + //! End the writing operation. + //! \param begin The begin write pointer returned by PutBegin(). + //! \return Number of characters written. + size_t PutEnd(Ch* begin); +} +~~~~~~~~~~ + +For input stream, they must implement `Peek()`, `Take()` and `Tell()`. +For output stream, they must implement `Put()` and `Flush()`. +There are two special interface, `PutBegin()` and `PutEnd()`, which are only for *in situ* parsing. Normal streams do not implement them. However, if the interface is not needed for a particular stream, it is still need to a dummy implementation, otherwise will generate compilation error. + +## Example: istream wrapper {#ExampleIStreamWrapper} + +The following example is a simple wrapper of `std::istream`, which only implements 3 functions. + +~~~~~~~~~~cpp +class MyIStreamWrapper { +public: + typedef char Ch; + + MyIStreamWrapper(std::istream& is) : is_(is) { + } + + Ch Peek() const { // 1 + int c = is_.peek(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + Ch Take() { // 2 + int c = is_.get(); + return c == std::char_traits<char>::eof() ? '\0' : (Ch)c; + } + + size_t Tell() const { return (size_t)is_.tellg(); } // 3 + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch) { assert(false); } + void Flush() { assert(false); } + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyIStreamWrapper(const MyIStreamWrapper&); + MyIStreamWrapper& operator=(const MyIStreamWrapper&); + + std::istream& is_; +}; +~~~~~~~~~~ + +User can use it to wrap instances of `std::stringstream`, `std::ifstream`. + +~~~~~~~~~~cpp +const char* json = "[1,2,3,4]"; +std::stringstream ss(json); +MyIStreamWrapper is(ss); + +Document d; +d.ParseStream(is); +~~~~~~~~~~ + +Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library. + +## Example: ostream wrapper {#ExampleOStreamWrapper} + +The following example is a simple wrapper of `std::istream`, which only implements 2 functions. + +~~~~~~~~~~cpp +class MyOStreamWrapper { +public: + typedef char Ch; + + MyOStreamWrapper(std::ostream& os) : os_(os) { + } + + Ch Peek() const { assert(false); return '\0'; } + Ch Take() { assert(false); return '\0'; } + size_t Tell() const { } + + Ch* PutBegin() { assert(false); return 0; } + void Put(Ch c) { os_.put(c); } // 1 + void Flush() { os_.flush(); } // 2 + size_t PutEnd(Ch*) { assert(false); return 0; } + +private: + MyOStreamWrapper(const MyOStreamWrapper&); + MyOStreamWrapper& operator=(const MyOStreamWrapper&); + + std::ostream& os_; +}; +~~~~~~~~~~ + +User can use it to wrap instances of `std::stringstream`, `std::ofstream`. + +~~~~~~~~~~cpp +Document d; +// ... + +std::stringstream ss; +MyOStreamWrapper os(ss); + +Writer<MyOStreamWrapper> writer(os); +d.Accept(writer); +~~~~~~~~~~ + +Note that, this implementation may not be as efficient as RapidJSON's memory or file streams, due to internal overheads of the standard library. + +# Summary {#Summary} + +This section describes stream classes available in RapidJSON. Memory streams are simple. File stream can reduce the memory required during JSON parsing and generation, if the JSON is stored in file system. Encoded streams converts between byte streams and character streams. Finally, user may create custom streams using a simple interface.