This is an automated email from the ASF dual-hosted git repository. mgrigorov pushed a commit to branch avro-3599-make-apache-avro-test-helper-releasable in repository https://gitbox.apache.org/repos/asf/avro.git
commit 2c222e18f4ecace04a87b096410c9fba553b3be2 Author: Martin Tzvetanov Grigorov <[email protected]> AuthorDate: Mon Aug 8 21:39:33 2022 +0300 AVRO-3599: Rust: Make apache-avro-test-helper releasable Signed-off-by: Martin Tzvetanov Grigorov <[email protected]> --- lang/rust/Cargo.lock | 116 +++---- lang/rust/README.md | 637 +--------------------------------- lang/rust/avro/Cargo.toml | 2 +- lang/rust/{ => avro}/README.md | 0 lang/rust/avro_test_helper/Cargo.toml | 13 +- 5 files changed, 67 insertions(+), 701 deletions(-) diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock index 2bc3d2e42..41c4c82a7 100644 --- a/lang/rust/Cargo.lock +++ b/lang/rust/Cargo.lock @@ -90,7 +90,7 @@ dependencies = [ [[package]] name = "apache-avro-test-helper" -version = "0.1.0" +version = "0.14.0" dependencies = [ "color-backtrace", "ctor", @@ -119,9 +119,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "backtrace" -version = "0.3.65" +version = "0.3.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11a17d453482a265fd5f8479f2a3f405566e6ca627837aaddb85af8b1ab8ef61" +checksum = "cab84319d616cfb654d03394f38ab7e6f0919e181b1b57e1fd15e7fb4077d9a7" dependencies = [ "addr2line", "cc", @@ -192,15 +192,6 @@ dependencies = [ "pkg-config", ] -[[package]] -name = "cast" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" -dependencies = [ - "rustc_version", -] - [[package]] name = "cast" version = "0.3.0" @@ -279,7 +270,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" dependencies = [ "atty", - "cast 0.3.0", + "cast", "clap", "criterion-plot", "csv", @@ -300,19 +291,19 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" dependencies = [ - "cast 0.2.7", + "cast", "itertools", ] [[package]] name = "crossbeam-channel" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c02a4d71819009c192cf4872265391563fd6a84c81ff2c0f2a7026ca4c1d85c" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" dependencies = [ "cfg-if", "crossbeam-utils", @@ -320,9 +311,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -331,9 +322,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07db9d94cbd326813772c968ccd25999e5f8ae22f4f8d1b11effa37ef6ce281d" +checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" dependencies = [ "autocfg", "cfg-if", @@ -345,9 +336,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83" +checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" dependencies = [ "cfg-if", "once_cell", @@ -355,9 +346,9 @@ dependencies = [ [[package]] name = "crypto-common" -version = "0.1.3" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" dependencies = [ "generic-array", "typenum", @@ -431,9 +422,9 @@ dependencies = [ [[package]] name = "diff" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" +checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "digest" @@ -447,9 +438,9 @@ dependencies = [ [[package]] name = "either" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be" [[package]] name = "env_logger" @@ -468,9 +459,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "generic-array" -version = "0.14.5" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" dependencies = [ "typenum", "version_check", @@ -489,9 +480,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78cc372d058dcf6d5ecd98510e7fbc9e5aec4d21de70f65fea8fecebcd881bd4" +checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" [[package]] name = "half" @@ -554,9 +545,9 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" [[package]] name = "jobserver" @@ -584,9 +575,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.126" +version = "0.2.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b" [[package]] name = "libflate" @@ -703,18 +694,18 @@ dependencies = [ [[package]] name = "object" -version = "0.28.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e42c982f2d955fac81dd7e1d0e1426a7d702acd9c98d19ab01083a6a0328c424" +checksum = "21158b2c33aa6d4561f1c0a6ea283ca92bc54802a93b263e910746d679a7eb53" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" [[package]] name = "oorandom" @@ -752,15 +743,15 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.2" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" +checksum = "193228616381fecdc1224c62e96946dfbc73ff4384fba576e052ff8c1bea8142" [[package]] name = "plotters-svg" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" +checksum = "e0918736323d1baff32ee0eade54984f6f201ad7e97d5cfb5d6ab4a358529615" dependencies = [ "plotters-backend", ] @@ -932,26 +923,17 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustversion" -version = "1.0.7" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf" +checksum = "97477e48b4cf8603ad5f7aaf897467cf42ab4218a38ef76fb14c2d6773a6d6a8" [[package]] name = "ryu" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" [[package]] name = "same-file" @@ -974,12 +956,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "semver" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41d061efea015927ac527063765e73601444cdc344ba855bc7bd44578b25e1c" - [[package]] name = "serde" version = "1.0.142" @@ -1016,7 +992,7 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38dd04e3c8279e75b31ef29dbdceebfe5ad89f4d0937213c53f7d49d01b3d5a7" dependencies = [ - "itoa 1.0.2", + "itoa 1.0.3", "ryu", "serde", ] @@ -1147,9 +1123,9 @@ checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] name = "unicode-ident" -version = "1.0.1" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" [[package]] name = "unicode-width" @@ -1287,9 +1263,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.58" +version = "0.3.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +checksum = "ed055ab27f941423197eb86b2035720b1a3ce40504df082cac2ecc6ed73335a1" dependencies = [ "js-sys", "wasm-bindgen", diff --git a/lang/rust/README.md b/lang/rust/README.md index 965dd51ab..199154393 100644 --- a/lang/rust/README.md +++ b/lang/rust/README.md @@ -19,640 +19,23 @@ # apache-avro -[](https://crates.io/crates/apache-avro) -[](https://github.com/apache/avro/actions/workflows/test-lang-rust-ci.yml) -[](https://docs.rs/apache-avro) -[](https://github.com/apache/avro/blob/master/LICENSE.txt) +Apache Avro Rust SDK -A library for working with [Apache Avro](https://avro.apache.org/) in Rust language. +# Subprojects -Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. +## Avro -**[Apache Avro](https://avro.apache.org/)** is a data serialization system which provides rich -data structures and a compact, fast, binary data format. +See [avro/README.md](./avro/README.md) -All data in Avro is schematized, as in the following example: +## Avro derive -``` -{ - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] -} -``` +See [avro_derive/README.md](./avro_derive/README.md) -There are basically two ways of handling Avro data in Rust: +## Avro test helper -* **as Avro-specialized data types** based on an Avro schema; -* **as generic Rust serde-compatible types** implementing/deriving `Serialize` and -`Deserialize`; +See [avro_test_helper/README.md](./avro_test_helper/README.md) -**apache-avro** provides a way to read and write both these data representations easily and -efficiently. +## WebAssembly demo application -## Installing the library +See [wasm-demo/README.md](./wasm-demo/README.md) - -Add to your `Cargo.toml`: - -```toml -[dependencies] -apache-avro = "x.y" -``` - -Or in case you want to leverage the **Snappy** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["snappy"] -``` - -Or in case you want to leverage the **Zstandard** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["zstandard"] -``` - -Or in case you want to leverage the **Bzip2** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["bzip"] -``` - -Or in case you want to leverage the **Xz** codec: - -```toml -[dependencies.apache-avro] -version = "x.y" -features = ["xz"] -``` - - -## Upgrading to a newer minor version - -The library is still in beta, so there might be backward-incompatible changes between minor -versions. If you have troubles upgrading, check the [version upgrade guide](migration_guide.md). - -## Defining a schema - -An Avro data cannot exist without an Avro schema. Schemas **must** be used while writing and -**can** be used while reading and they carry the information regarding the type of data we are -handling. Avro schemas are used for both schema validation and resolution of Avro data. - -Avro schemas are defined in **JSON** format and can just be parsed out of a raw string: - -```rust -use apache_avro::Schema; - -let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } -"#; - -// if the schema is not valid, this function will return an error -let schema = Schema::parse_str(raw_schema).unwrap(); - -// schemas can be printed for debugging -println!("{:?}", schema); -``` - -Additionally, a list of of definitions (which may depend on each other) can be given and all of -them will be parsed into the corresponding schemas. - -```rust -use apache_avro::Schema; - -let raw_schema_1 = r#"{ - "name": "A", - "type": "record", - "fields": [ - {"name": "field_one", "type": "float"} - ] - }"#; - -// This definition depends on the definition of A above -let raw_schema_2 = r#"{ - "name": "B", - "type": "record", - "fields": [ - {"name": "field_one", "type": "A"} - ] - }"#; - -// if the schemas are not valid, this function will return an error -let schemas = Schema::parse_list(&[raw_schema_1, raw_schema_2]).unwrap(); - -// schemas can be printed for debugging -println!("{:?}", schemas); -``` -*N.B.* It is important to note that the composition of schema definitions requires schemas with names. -For this reason, only schemas of type Record, Enum, and Fixed should be input into this function. - -The library provides also a programmatic interface to define schemas without encoding them in -JSON (for advanced use), but we highly recommend the JSON interface. Please read the API -reference in case you are interested. - -For more information about schemas and what kind of information you can encapsulate in them, -please refer to the appropriate section of the -[Avro Specification](https://avro.apache.org/docs/current/spec.html#schemas). - -## Writing data - -Once we have defined a schema, we are ready to serialize data in Avro, validating them against -the provided schema in the process. As mentioned before, there are two ways of handling Avro -data in Rust. - -**NOTE:** The library also provides a low-level interface for encoding a single datum in Avro -bytecode without generating markers and headers (for advanced use), but we highly recommend the -`Writer` interface to be totally Avro-compatible. Please read the API reference in case you are -interested. - -### The avro way - -Given that the schema we defined above is that of an Avro *Record*, we are going to use the -associated type provided by the library to specify the data we want to serialize: - -```rust -use apache_avro::types::Record; -use apache_avro::Writer; -# -// a writer needs a schema and something to write to -let mut writer = Writer::new(&schema, Vec::new()); - -// the Record type models our Record schema -let mut record = Record::new(writer.schema()).unwrap(); -record.put("a", 27i64); -record.put("b", "foo"); - -// schema validation happens here -writer.append(record).unwrap(); - -// this is how to get back the resulting avro bytecode -// this performs a flush operation to make sure data has been written, so it can fail -// you can also call `writer.flush()` yourself without consuming the writer -let encoded = writer.into_inner().unwrap(); -``` - -The vast majority of the times, schemas tend to define a record as a top-level container -encapsulating all the values to convert as fields and providing documentation for them, but in -case we want to directly define an Avro value, the library offers that capability via the -`Value` interface. - -```rust -use apache_avro::types::Value; - -let mut value = Value::String("foo".to_string()); -``` - -### The serde way - -Given that the schema we defined above is an Avro *Record*, we can directly use a Rust struct -deriving `Serialize` to model our data: - -```rust -use apache_avro::Writer; - -#[derive(Debug, Serialize)] -struct Test { - a: i64, - b: String, -} - -// a writer needs a schema and something to write to -let mut writer = Writer::new(&schema, Vec::new()); - -// the structure models our Record schema -let test = Test { - a: 27, - b: "foo".to_owned(), -}; - -// schema validation happens here -writer.append_ser(test).unwrap(); - -// this is how to get back the resulting avro bytecode -// this performs a flush operation to make sure data is written, so it can fail -// you can also call `writer.flush()` yourself without consuming the writer -let encoded = writer.into_inner(); -``` - -The vast majority of the times, schemas tend to define a record as a top-level container -encapsulating all the values to convert as fields and providing documentation for them, but in -case we want to directly define an Avro value, any type implementing `Serialize` should work. - -```rust -let mut value = "foo".to_string(); -``` - -### Using codecs to compress data - -Avro supports three different compression codecs when encoding data: - -* **Null**: leaves data uncompressed; -* **Deflate**: writes the data block using the deflate algorithm as specified in RFC 1951, and -typically implemented using the zlib library. Note that this format (unlike the "zlib format" in -RFC 1950) does not have a checksum. -* **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each -compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in -the block. You must enable the `snappy` feature to use this codec. -* **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. -You must enable the `zstandard` feature to use this codec. -* **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library. -You must enable the `bzip` feature to use this codec. -* **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. - You must enable the `xz` feature to use this codec. - - -To specify a codec to use to compress data, just specify it while creating a `Writer`: -```rust -use apache_avro::Writer; -use apache_avro::Codec; -# -let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); -``` - -## Reading data - -As far as reading Avro encoded data goes, we can just use the schema encoded with the data to -read them. The library will do it automatically for us, as it already does for the compression -codec: - -```rust -use apache_avro::Reader; -# -// reader creation can fail in case the input to read from is not Avro-compatible or malformed -let reader = Reader::new(&input[..]).unwrap(); -``` - -In case, instead, we want to specify a different (but compatible) reader schema from the schema -the data has been written with, we can just do as the following: -```rust -use apache_avro::Schema; -use apache_avro::Reader; -# - -let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - {"name": "c", "type": "long", "default": 43} - ] - } -"#; - -let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - -// reader creation can fail in case the input to read from is not Avro-compatible or malformed -let reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); -``` - -The library will also automatically perform schema resolution while reading the data. - -For more information about schema compatibility and resolution, please refer to the -[Avro Specification](https://avro.apache.org/docs/current/spec.html#schemas). - -As usual, there are two ways to handle Avro data in Rust, as you can see below. - -**NOTE:** The library also provides a low-level interface for decoding a single datum in Avro -bytecode without markers and header (for advanced use), but we highly recommend the `Reader` -interface to leverage all Avro features. Please read the API reference in case you are -interested. - - -### The avro way - -We can just read directly instances of `Value` out of the `Reader` iterator: - -```rust -use apache_avro::Reader; -# -let reader = Reader::new(&input[..]).unwrap(); - -// value is a Result of an Avro Value in case the read operation fails -for value in reader { - println!("{:?}", value.unwrap()); -} - -``` - -### The serde way - -Alternatively, we can use a Rust type implementing `Deserialize` and representing our schema to -read the data into: - -```rust -use apache_avro::Reader; -use apache_avro::from_value; - -#[derive(Debug, Deserialize)] -struct Test { - a: i64, - b: String, -} - -let reader = Reader::new(&input[..]).unwrap(); - -// value is a Result in case the read operation fails -for value in reader { - println!("{:?}", from_value::<Test>(&value.unwrap())); -} -``` - -## Putting everything together - -The following is an example of how to combine everything showed so far and it is meant to be a -quick reference of the library interface: - -```rust -use apache_avro::{Codec, Reader, Schema, Writer, from_value, types::Record, Error}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Deserialize, Serialize)] -struct Test { - a: i64, - b: String, -} - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - - let schema = Schema::parse_str(raw_schema)?; - - println!("{:?}", schema); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); - - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - - writer.append(record)?; - - let test = Test { - a: 27, - b: "foo".to_owned(), - }; - - writer.append_ser(test)?; - - let input = writer.into_inner()?; - let reader = Reader::with_schema(&schema, &input[..])?; - - for record in reader { - println!("{:?}", from_value::<Test>(&record?)); - } - Ok(()) -} -``` - -`apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/spec.html#Logical+Types): - -1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/0.2.6/num_bigint) crate -1. UUID using the [`uuid`](https://docs.rs/uuid/0.8.1/uuid) crate -1. Date, Time (milli) as `i32` and Time (micro) as `i64` -1. Timestamp (milli and micro) as `i64` -1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` - -Note that the on-disk representation is identical to the underlying primitive/complex type. - -#### Read and write logical types - -```rust -use apache_avro::{ - types::Record, types::Value, Codec, Days, Decimal, Duration, Millis, Months, Reader, Schema, - Writer, Error, -}; -use num_bigint::ToBigInt; - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - { - "name": "decimal_fixed", - "type": { - "type": "fixed", - "size": 2, - "name": "decimal" - }, - "logicalType": "decimal", - "precision": 4, - "scale": 2 - }, - { - "name": "decimal_var", - "type": "bytes", - "logicalType": "decimal", - "precision": 10, - "scale": 3 - }, - { - "name": "uuid", - "type": "string", - "logicalType": "uuid" - }, - { - "name": "date", - "type": "int", - "logicalType": "date" - }, - { - "name": "time_millis", - "type": "int", - "logicalType": "time-millis" - }, - { - "name": "time_micros", - "type": "long", - "logicalType": "time-micros" - }, - { - "name": "timestamp_millis", - "type": "long", - "logicalType": "timestamp-millis" - }, - { - "name": "timestamp_micros", - "type": "long", - "logicalType": "timestamp-micros" - }, - { - "name": "duration", - "type": { - "type": "fixed", - "size": 12, - "name": "duration" - }, - "logicalType": "duration" - } - ] - } - "#; - - let schema = Schema::parse_str(raw_schema)?; - - println!("{:?}", schema); - - let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); - - let mut record = Record::new(writer.schema()).unwrap(); - record.put("decimal_fixed", Decimal::from(9936.to_bigint().unwrap().to_signed_bytes_be())); - record.put("decimal_var", Decimal::from((-32442.to_bigint().unwrap()).to_signed_bytes_be())); - record.put("uuid", uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000").unwrap()); - record.put("date", Value::Date(1)); - record.put("time_millis", Value::TimeMillis(2)); - record.put("time_micros", Value::TimeMicros(3)); - record.put("timestamp_millis", Value::TimestampMillis(4)); - record.put("timestamp_micros", Value::TimestampMicros(5)); - record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); - - writer.append(record)?; - - let input = writer.into_inner()?; - let reader = Reader::with_schema(&schema, &input[..])?; - - for record in reader { - println!("{:?}", record?); - } - Ok(()) -} -``` - -### Calculate Avro schema fingerprint - -This library supports calculating the following fingerprints: - - - SHA-256 - - MD5 - - Rabin - -An example of fingerprinting for the supported fingerprints: - -```rust -use apache_avro::rabin::Rabin; -use apache_avro::{Schema, Error}; -use md5::Md5; -use sha2::Sha256; - -fn main() -> Result<(), Error> { - let raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"} - ] - } - "#; - let schema = Schema::parse_str(raw_schema)?; - println!("{}", schema.fingerprint::<Sha256>()); - println!("{}", schema.fingerprint::<Md5>()); - println!("{}", schema.fingerprint::<Rabin>()); - Ok(()) -} -``` - -### Ill-formed data - -In order to ease decoding, the Binary Encoding specification of Avro data -requires some fields to have their length encoded alongside the data. - -If encoded data passed to a `Reader` has been ill-formed, it can happen that -the bytes meant to contain the length of data are bogus and could result -in extravagant memory allocation. - -To shield users from ill-formed data, `apache-avro` sets a limit (default: 512MB) -to any allocation it will perform when decoding data. - -If you expect some of your data fields to be larger than this limit, be sure -to make use of the `max_allocation_bytes` function before reading **any** data -(we leverage Rust's [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) -mechanism to initialize this value, if -any call to decode is made before a call to `max_allocation_bytes`, the limit -will be 512MB throughout the lifetime of the program). - - -```rust -use apache_avro::max_allocation_bytes; - -max_allocation_bytes(2 * 1024 * 1024 * 1024); // 2GB - -// ... happily decode large data - -``` - -### Check schemas compatibility - -This library supports checking for schemas compatibility. - -Examples of checking for compatibility: - -1. Compatible schemas - -Explanation: an int array schema can be read by a long array schema- an int -(32bit signed integer) fits into a long (64bit signed integer) - -```rust -use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; - -let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -assert_eq!(true, SchemaCompatibility::can_read(&writers_schema, &readers_schema)); -``` - -2. Incompatible schemas (a long array schema cannot be read by an int array schema) - -Explanation: a long array schema cannot be read by an int array schema- a -long (64bit signed integer) does not fit into an int (32bit signed integer) - -```rust -use apache_avro::{Schema, schema_compatibility::SchemaCompatibility}; - -let writers_schema = Schema::parse_str(r#"{"type": "array", "items":"long"}"#).unwrap(); -let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).unwrap(); -assert_eq!(false, SchemaCompatibility::can_read(&writers_schema, &readers_schema)); -``` - -## Minimal supported Rust version - -1.54.0 - -## License -This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/master/LICENSE.txt). - -## Contributing -Everyone is encouraged to contribute! You can contribute by forking the GitHub repo and making a pull request or opening an issue. -All contributions will be licensed under [Apache License 2.0](https://github.com/apache/avro/blob/master/LICENSE.txt). - -Please consider adding documentation and tests! -If you introduce a backward-incompatible change, please consider adding instruction to migrate in the [Migration Guide](migration_guide.md) -If you modify the crate documentation in `lib.rs`, run `make readme` to sync the README file. diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml index d1d0292a3..f47f8f97b 100644 --- a/lang/rust/avro/Cargo.toml +++ b/lang/rust/avro/Cargo.toml @@ -83,7 +83,7 @@ rand = { default-features = false, version = "0.8.5", features = ["default"] } [dev-dependencies] anyhow = { default-features = false, version = "1.0.60", features = ["std"] } -apache-avro-test-helper = { default-features = false, version = "0.1.0", path = "../avro_test_helper" } +apache-avro-test-helper = { default-features = false, version = "0.14.0", path = "../avro_test_helper" } criterion = { default-features = false, version = "0.3.6" } hex-literal = { default-features = false, version = "0.3.4" } md-5 = { default-features = false, version = "0.10.1" } diff --git a/lang/rust/README.md b/lang/rust/avro/README.md similarity index 100% copy from lang/rust/README.md copy to lang/rust/avro/README.md diff --git a/lang/rust/avro_test_helper/Cargo.toml b/lang/rust/avro_test_helper/Cargo.toml index 0e77a5d55..4f3cdf02a 100644 --- a/lang/rust/avro_test_helper/Cargo.toml +++ b/lang/rust/avro_test_helper/Cargo.toml @@ -17,10 +17,17 @@ [package] name = "apache-avro-test-helper" -version = "0.1.0" +version = "0.14.0" edition = "2018" -publish = false -description = "Avro test helper. This crate is not supposed to be published!" +description = "Apache Avro tests helper." +authors = ["Apache Avro team <[email protected]>"] +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/apache/avro" +keywords = ["avro", "data", "serialization", "test"] +categories = ["encoding"] +documentation = "https://docs.rs/apache-avro-test-helper" + [dependencies] color-backtrace = { default-features = false, version = "0.5.1" }
