This is an automated email from the git hooks/post-receive script. nickm-guest pushed a commit to branch master in repository libcbor-xs-perl.
commit 439f260710caad832debfb0a9635630a385fa829 Author: Nick Morrott <knowledgejun...@gmail.com> Date: Wed May 25 19:37:27 2016 +0100 Imported Upstream version 1.50 --- Changes | 22 +++++++ MANIFEST | 1 + META.json | 5 +- META.yml | 5 +- Makefile.PL | 3 + README | 114 +++++++++++++++++++++++++++++----- XS.pm | 192 ++++++++++++++++++++++++++++++++++++++++------------------ XS.xs | 52 +++++++++++++--- t/53_bignum.t | 25 +++++++- t/58_hv.t | 70 +++++++++++++++++++++ 10 files changed, 398 insertions(+), 91 deletions(-) diff --git a/Changes b/Changes index c621b57..65274cd 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,28 @@ TODO: document encode_cbor_sharing? TODO: weaken cyclic structures? TODO: allowed_classes or so? +1.5 Wed Apr 27 11:38:39 CEST 2016 + - Math::BigFloat madness workaround, see + http://blog.schmorp.de/2016-04-23-mathbigfloat-maintainer-fail.html + (bugreport by z...@softvisio.net). + - add text_keys and text_strings options to force CBOR text encoding + for perl hash keys or all strings, as a result of discussions + with Fredrik Ljunggren. + - implement support for arbitrary-exponent numbers (see + http://peteroupc.github.io/CBOR/bigfrac.html, tags 264 and 265) + for both en- and decoding. + - implement support for rational numbers (see + http://peteroupc.github.io/CBOR/rational.html, tag 30) for both + en- and decoding. + - the above effectively implements all registered CBOR extensions + in a sensible manner. + - remove some weird dead code that was duplicated (%FILTER). + - add t/58_hv.t, which tests hashes and the new text_* flags. + hashes apparently were not encoded at all in any of the existing + tests. + - document Math::BigFloat base-2 performance/crash issues. + - use stability canary. + 1.41 Thu 25 Feb 15:22:03 CET 2016 - avoid perl panics on nested FREEZE/THAW calls (testcase by Victor Efimov). diff --git a/MANIFEST b/MANIFEST index d2ba32e..44caa1a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -16,6 +16,7 @@ t/54_sharing.t t/55_utf8.t t/56_filter.t t/57_incr.t +t/58_hv.t t/99_binary.t META.yml Module YAML meta-data (added by MakeMaker) META.json Module JSON meta-data (added by MakeMaker) diff --git a/META.json b/META.json index 6f0bce4..28d10a3 100644 --- a/META.json +++ b/META.json @@ -27,7 +27,8 @@ }, "configure" : { "requires" : { - "ExtUtils::MakeMaker" : "0" + "Canary::Stability" : "0", + "ExtUtils::MakeMaker" : "6.52" } }, "runtime" : { @@ -38,5 +39,5 @@ } }, "release_status" : "stable", - "version" : 1.41 + "version" : 1.5 } diff --git a/META.yml b/META.yml index bba02ce..6a8cc94 100644 --- a/META.yml +++ b/META.yml @@ -5,7 +5,8 @@ author: build_requires: ExtUtils::MakeMaker: '0' configure_requires: - ExtUtils::MakeMaker: '0' + Canary::Stability: '0' + ExtUtils::MakeMaker: '6.52' dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 7.1, CPAN::Meta::Converter version 2.150001' license: unknown @@ -20,4 +21,4 @@ no_index: requires: Types::Serialiser: '0' common::sense: '0' -version: 1.41 +version: 1.5 diff --git a/Makefile.PL b/Makefile.PL index 31f6ed9..0cdcfe0 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,6 +1,8 @@ use 5.010001; # for utf-8, and Time::Piece use ExtUtils::MakeMaker; +use Canary::Stability CBOR::XS => 1, 5.010001; + WriteMakefile( dist => { PREOP => 'pod2text XS.pm | tee README >$(DISTVNAME)/README; chmod -R u=rwX,go=rX . ;', @@ -13,5 +15,6 @@ WriteMakefile( common::sense => 0, Types::Serialiser => 0, }, + CONFIGURE_REQUIRES => { ExtUtils::MakeMaker => 6.52, Canary::Stability => 0 }, ); diff --git a/README b/README index e35943c..45cb122 100644 --- a/README +++ b/README @@ -207,6 +207,49 @@ OBJECT-ORIENTED INTERFACE This option does not affect "decode" in any way - string references will always be decoded properly if present. + $cbor = $cbor->text_keys ([$enable]) + $enabled = $cbor->get_text_keys + If $enabled is true (or missing), then "encode" will encode all perl + hash keys as CBOR text strings/UTF-8 string, upgrading them as + needed. + + If $enable is false (the default), then "encode" will encode hash + keys normally - upgraded perl strings (strings internally encoded as + UTF-8) as CBOR text strings, and downgraded perl strings as CBOR + byte strings. + + This option does not affect "decode" in any way. + + This option is useful for interoperability with CBOR decoders that + don't treat byte strings as a form of text. It is especially useful + as Perl gives very little control over hash keys. + + Enabling this option can be slow, as all downgraded hash keys that + are encoded need to be scanned and converted to UTF-8. + + $cbor = $cbor->text_strings ([$enable]) + $enabled = $cbor->get_text_strings + This option works similar to "text_keys", above, but works on all + strings (including hash keys), so "text_keys" has no further effect + after enabling "text_strings". + + If $enabled is true (or missing), then "encode" will encode all perl + strings as CBOR text strings/UTF-8 strings, upgrading them as + needed. + + If $enable is false (the default), then "encode" will encode strings + normally (but see "text_keys") - upgraded perl strings (strings + internally encoded as UTF-8) as CBOR text strings, and downgraded + perl strings as CBOR byte strings. + + This option does not affect "decode" in any way. + + This option has similar advantages and disadvantages as "text_keys". + In addition, this option effectively removes the ability to encode + byte strings, which might break some "FREEZE" and "TO_CBOR" methods + that rely on this, such as bignum encoding, so this option is mainly + useful for very simple data. + $cbor = $cbor->validate_utf8 ([$enable]) $enabled = $cbor->get_validate_utf8 If $enable is true (or missing), then "decode" will validate that @@ -219,7 +262,7 @@ OBJECT-ORIENTED INTERFACE If $enable is false (the default), then "decode" will blindly accept UTF-8 data, marking them as valid UTF-8 in the resulting data - structure regardless of whether thats true or not. + structure regardless of whether that's true or not. Perl isn't too happy about corrupted UTF-8 in strings, but should generally not crash or do similarly evil things. Extensions might be @@ -411,7 +454,7 @@ MAPPING Perl hash references become CBOR maps. As there is no inherent ordering in hash keys (or CBOR maps), they will usually be encoded in a pseudo-random order. This order can be different each time a - hahs is encoded. + hash is encoded. Currently, tied hashes will use the indefinite-length format, while normal hashes will use the fixed-length format. @@ -470,15 +513,18 @@ MAPPING $x .= ""; # another, more awkward way to stringify print $x; # perl does it for you, too, quite often - You can force whether a string ie encoded as byte or text string by - using "utf8::upgrade" and "utf8::downgrade"): + You can force whether a string is encoded as byte or text string by + using "utf8::upgrade" and "utf8::downgrade" (if "text_strings" is + disabled): utf8::upgrade $x; # encode $x as text string utf8::downgrade $x; # encode $x as byte string Perl doesn't define what operations up- and downgrade strings, so if the difference between byte and text is important, you should up- or - downgrade your string as late as possible before encoding. + downgrade your string as late as possible before encoding. You can + also force the use of CBOR text strings by using "text_keys" or + "text_strings". You can force the type to be a CBOR number by numifying it: @@ -583,7 +629,6 @@ MAPPING sub URI::THAW { my ($class, $serialiser, $uri) = @_; - $class->new ($uri) } @@ -689,7 +734,7 @@ TAG HANDLING AND EXTENSIONS ENFORCED TAGS These tags are always handled when decoding, and their handling cannot - be overriden by the user. + be overridden by the user. 26 (perl-object, <http://cbor.schmorp.de/perl-object>) These tags are automatically created (and decoded) for serialisable @@ -722,8 +767,8 @@ TAG HANDLING AND EXTENSIONS 22098 (indirection, <http://cbor.schmorp.de/indirection>) This tag is automatically generated when a reference are encountered - (with the exception of hash and array refernces). It is converted to - a reference when decoding. + (with the exception of hash and array references). It is converted + to a reference when decoding. 55799 (self-describe CBOR, RFC 7049) This value is not generated on encoding (unless explicitly requested @@ -731,8 +776,8 @@ TAG HANDLING AND EXTENSIONS NON-ENFORCED TAGS These tags have default filters provided when decoding. Their handling - can be overriden by changing the %CBOR::XS::FILTER entry for the tag, or - by providing a custom "filter" callback when decoding. + can be overridden by changing the %CBOR::XS::FILTER entry for the tag, + or by providing a custom "filter" callback when decoding. When they result in decoding into a specific Perl class, the module usually provides a corresponding "TO_CBOR" method as well. @@ -757,15 +802,23 @@ TAG HANDLING AND EXTENSIONS "Math::BigInt::TO_CBOR" method encodes "small" bigints into normal CBOR integers, and others into positive/negative CBOR bignums. - 4, 5 (decimal fraction/bigfloat) + 4, 5, 264, 265 (decimal fraction/bigfloat) Both decimal fractions and bigfloats are decoded into Math::BigFloat objects. The corresponding "Math::BigFloat::TO_CBOR" method *always* - encodes into a decimal fraction. + encodes into a decimal fraction (either tag 4 or 264). - CBOR cannot represent bigfloats with *very* large exponents - - conversion of such big float objects is undefined. + NaN and infinities are not encoded properly, as they cannot be + represented in CBOR. - Also, NaN and infinities are not encoded properly. + See "BIGNUM SECURITY CONSIDERATIONS" for more info. + + 30 (rational numbers) + These tags are decoded into Math::BigRat objects. The corresponding + "Math::BigRat::TO_CBOR" method encodes rational numbers with + denominator 1 via their numerator only, i.e., they become normal + integers or "bignums". + + See "BIGNUM SECURITY CONSIDERATIONS" for more info. 21, 22, 23 (expected later JSON conversion) CBOR::XS is not a CBOR-to-JSON converter, and will simply ignore @@ -822,6 +875,35 @@ SECURITY CONSIDERATIONS information you might want to make sure that exceptions thrown by CBOR::XS will not end up in front of untrusted eyes. +BIGNUM SECURITY CONSIDERATIONS + CBOR::XS provides a "TO_CBOR" method for both Math::BigInt and + Math::BigFloat that tries to encode the number in the simplest possible + way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag + 4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers + (Math::BigRat, tag 30) can also contain bignums as members. + + CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent + bigfloats (tags 5 and 265), but it will never generate these on its own. + + Using the built-in Math::BigInt::Calc support, encoding and decoding + decimal fractions is generally fast. Decoding bigints can be slow for + very big numbers (tens of thousands of digits, something that could + potentially be caught by limiting the size of CBOR texts), and decoding + bigfloats or arbitrary-exponent bigfloats can be *extremely* slow + (minutes, decades) for large exponents (roughly 40 bit and longer). + + Additionally, Math::BigInt can take advantage of other bignum libraries, + such as Math::GMP, which cannot handle big floats with large exponents, + and might simply abort or crash your program, due to their code quality. + + This can be a concern if you want to parse untrusted CBOR. If it is, you + might want to disable decoding of tag 2 (bigint) and 3 (negative bigint) + types. You should also disable types 5 and 265, as these can be slow + even without bigints. + + Disabling bigints will also partially or fully disable types that rely + on them, e.g. rational numbers that use bignums. + CBOR IMPLEMENTATION NOTES This section contains some random implementation notes. They do not describe guaranteed behaviour, but merely behaviour as-is implemented diff --git a/XS.pm b/XS.pm index ffcf9f3..6877316 100644 --- a/XS.pm +++ b/XS.pm @@ -66,7 +66,7 @@ package CBOR::XS; use common::sense; -our $VERSION = 1.41; +our $VERSION = 1.5; our @ISA = qw(Exporter); our @EXPORT = qw(encode_cbor decode_cbor); @@ -249,6 +249,50 @@ the standard CBOR way. This option does not affect C<decode> in any way - string references will always be decoded properly if present. +=item $cbor = $cbor->text_keys ([$enable]) + +=item $enabled = $cbor->get_text_keys + +If C<$enabled> is true (or missing), then C<encode> will encode all +perl hash keys as CBOR text strings/UTF-8 string, upgrading them as needed. + +If C<$enable> is false (the default), then C<encode> will encode hash keys +normally - upgraded perl strings (strings internally encoded as UTF-8) as +CBOR text strings, and downgraded perl strings as CBOR byte strings. + +This option does not affect C<decode> in any way. + +This option is useful for interoperability with CBOR decoders that don't +treat byte strings as a form of text. It is especially useful as Perl +gives very little control over hash keys. + +Enabling this option can be slow, as all downgraded hash keys that are +encoded need to be scanned and converted to UTF-8. + +=item $cbor = $cbor->text_strings ([$enable]) + +=item $enabled = $cbor->get_text_strings + +This option works similar to C<text_keys>, above, but works on all strings +(including hash keys), so C<text_keys> has no further effect after +enabling C<text_strings>. + +If C<$enabled> is true (or missing), then C<encode> will encode all perl +strings as CBOR text strings/UTF-8 strings, upgrading them as needed. + +If C<$enable> is false (the default), then C<encode> will encode strings +normally (but see C<text_keys>) - upgraded perl strings (strings +internally encoded as UTF-8) as CBOR text strings, and downgraded perl +strings as CBOR byte strings. + +This option does not affect C<decode> in any way. + +This option has similar advantages and disadvantages as C<text_keys>. In +addition, this option effectively removes the ability to encode byte +strings, which might break some C<FREEZE> and C<TO_CBOR> methods that rely +on this, such as bignum encoding, so this option is mainly useful for very +simple data. + =item $cbor = $cbor->validate_utf8 ([$enable]) =item $enabled = $cbor->get_validate_utf8 @@ -263,7 +307,7 @@ of the official UTF-8. If C<$enable> is false (the default), then C<decode> will blindly accept UTF-8 data, marking them as valid UTF-8 in the resulting data structure -regardless of whether thats true or not. +regardless of whether that's true or not. Perl isn't too happy about corrupted UTF-8 in strings, but should generally not crash or do similarly evil things. Extensions might be not @@ -483,7 +527,7 @@ is meant by a perl value. Perl hash references become CBOR maps. As there is no inherent ordering in hash keys (or CBOR maps), they will usually be encoded in a pseudo-random -order. This order can be different each time a hahs is encoded. +order. This order can be different each time a hash is encoded. Currently, tied hashes will use the indefinite-length format, while normal hashes will use the fixed-length format. @@ -546,15 +590,16 @@ You can force the type to be a CBOR string by stringifying it: $x .= ""; # another, more awkward way to stringify print $x; # perl does it for you, too, quite often -You can force whether a string ie encoded as byte or text string by using -C<utf8::upgrade> and C<utf8::downgrade>): +You can force whether a string is encoded as byte or text string by using +C<utf8::upgrade> and C<utf8::downgrade> (if C<text_strings> is disabled): utf8::upgrade $x; # encode $x as text string utf8::downgrade $x; # encode $x as byte string Perl doesn't define what operations up- and downgrade strings, so if the difference between byte and text is important, you should up- or downgrade -your string as late as possible before encoding. +your string as late as possible before encoding. You can also force the +use of CBOR text strings by using C<text_keys> or C<text_strings>. You can force the type to be a CBOR number by numifying it: @@ -665,7 +710,6 @@ would be a possible implementation: sub URI::THAW { my ($class, $serialiser, $uri) = @_; - $class->new ($uri) } @@ -806,7 +850,7 @@ additional tags (such as base64url). =head2 ENFORCED TAGS These tags are always handled when decoding, and their handling cannot be -overriden by the user. +overridden by the user. =over 4 @@ -844,7 +888,7 @@ encoded, however, when C<pack_strings> is enabled. =item 22098 (indirection, L<http://cbor.schmorp.de/indirection>) This tag is automatically generated when a reference are encountered (with -the exception of hash and array refernces). It is converted to a reference +the exception of hash and array references). It is converted to a reference when decoding. =item 55799 (self-describe CBOR, RFC 7049) @@ -857,7 +901,7 @@ the user), and is simply ignored when decoding. =head2 NON-ENFORCED TAGS These tags have default filters provided when decoding. Their handling can -be overriden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by +be overridden by changing the C<%CBOR::XS::FILTER> entry for the tag, or by providing a custom C<filter> callback when decoding. When they result in decoding into a specific Perl class, the module @@ -885,16 +929,25 @@ These tags are decoded into L<Math::BigInt> objects. The corresponding C<Math::BigInt::TO_CBOR> method encodes "small" bigints into normal CBOR integers, and others into positive/negative CBOR bignums. -=item 4, 5 (decimal fraction/bigfloat) +=item 4, 5, 264, 265 (decimal fraction/bigfloat) Both decimal fractions and bigfloats are decoded into L<Math::BigFloat> objects. The corresponding C<Math::BigFloat::TO_CBOR> method I<always> -encodes into a decimal fraction. +encodes into a decimal fraction (either tag 4 or 264). -CBOR cannot represent bigfloats with I<very> large exponents - conversion -of such big float objects is undefined. +NaN and infinities are not encoded properly, as they cannot be represented +in CBOR. -Also, NaN and infinities are not encoded properly. +See L<BIGNUM SECURITY CONSIDERATIONS> for more info. + +=item 30 (rational numbers) + +These tags are decoded into L<Math::BigRat> objects. The corresponding +C<Math::BigRat::TO_CBOR> method encodes rational numbers with denominator +C<1> via their numerator only, i.e., they become normal integers or +C<bignums>. + +See L<BIGNUM SECURITY CONSIDERATIONS> for more info. =item 21, 22, 23 (expected later JSON conversion) @@ -910,48 +963,6 @@ C<URI::TO_CBOR> method again results in a CBOR URI value. =cut -our %FILTER = ( - # 0 # rfc4287 datetime, utf-8 - # 1 # unix timestamp, any - - 2 => sub { # pos bigint - require Math::BigInt; - Math::BigInt->new ("0x" . unpack "H*", pop) - }, - - 3 => sub { # neg bigint - require Math::BigInt; - -Math::BigInt->new ("0x" . unpack "H*", pop) - }, - - 4 => sub { # decimal fraction, array - require Math::BigFloat; - Math::BigFloat->new ($_[1][1] . "E" . $_[1][0]) - }, - - 5 => sub { # bigfloat, array - require Math::BigFloat; - scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2) - }, - - 21 => sub { pop }, # expected conversion to base64url encoding - 22 => sub { pop }, # expected conversion to base64 encoding - 23 => sub { pop }, # expected conversion to base16 encoding - - # 24 # embedded cbor, byte string - - 32 => sub { - require URI; - URI->new (pop) - }, - - # 33 # base64url rfc4648, utf-8 - # 34 # base64 rfc46484, utf-8 - # 35 # regex pcre/ecma262, utf-8 - # 36 # mime message rfc2045, utf-8 -); - - =head1 CBOR and JSON CBOR is supposed to implement a superset of the JSON data model, and is, @@ -1002,6 +1013,39 @@ structures in its error messages, so when you serialise sensitive information you might want to make sure that exceptions thrown by CBOR::XS will not end up in front of untrusted eyes. + +=head1 BIGNUM SECURITY CONSIDERATIONS + +CBOR::XS provides a C<TO_CBOR> method for both L<Math::BigInt> and +L<Math::BigFloat> that tries to encode the number in the simplest possible +way, that is, either a CBOR integer, a CBOR bigint/decimal fraction (tag +4) or an arbitrary-exponent decimal fraction (tag 264). Rational numbers +(L<Math::BigRat>, tag 30) can also contain bignums as members. + +CBOR::XS will also understand base-2 bigfloat or arbitrary-exponent +bigfloats (tags 5 and 265), but it will never generate these on its own. + +Using the built-in L<Math::BigInt::Calc> support, encoding and decoding +decimal fractions is generally fast. Decoding bigints can be slow for very +big numbers (tens of thousands of digits, something that could potentially +be caught by limiting the size of CBOR texts), and decoding bigfloats or +arbitrary-exponent bigfloats can be I<extremely> slow (minutes, decades) +for large exponents (roughly 40 bit and longer). + +Additionally, L<Math::BigInt> can take advantage of other bignum +libraries, such as L<Math::GMP>, which cannot handle big floats with large +exponents, and might simply abort or crash your program, due to their code +quality. + +This can be a concern if you want to parse untrusted CBOR. If it is, you +might want to disable decoding of tag 2 (bigint) and 3 (negative bigint) +types. You should also disable types 5 and 265, as these can be slow even +without bigints. + +Disabling bigints will also partially or fully disable types that rely on +them, e.g. rational numbers that use bignums. + + =head1 CBOR IMPLEMENTATION NOTES This section contains some random implementation notes. They do not @@ -1096,9 +1140,24 @@ our %FILTER = ( Math::BigFloat->new ($_[1][1] . "E" . $_[1][0]) }, + 264 => sub { # decimal fraction with arbitrary exponent + require Math::BigFloat; + Math::BigFloat->new ($_[1][1] . "E" . $_[1][0]) + }, + 5 => sub { # bigfloat, array require Math::BigFloat; - scalar Math::BigFloat->new ($_[1][1])->blsft ($_[1][0], 2) + scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0]) + }, + + 265 => sub { # bigfloat with arbitrary exponent + require Math::BigFloat; + scalar Math::BigFloat->new ($_[1][1]) * Math::BigFloat->new (2)->bpow ($_[1][0]) + }, + + 30 => sub { # rational number + require Math::BigRat; + Math::BigRat->new ("$_[1][0]/$_[1][1]") # separate parameters only work in recent versons }, 21 => sub { pop }, # expected conversion to base64url encoding @@ -1129,7 +1188,7 @@ sub URI::TO_CBOR { } sub Math::BigInt::TO_CBOR { - if ($_[0] >= -2147483648 && $_[0] <= 2147483647) { + if (-2147483648 <= $_[0] && $_[0] <= 2147483647) { $_[0]->numify } else { my $hex = substr $_[0]->as_hex, 2; @@ -1140,7 +1199,20 @@ sub Math::BigInt::TO_CBOR { sub Math::BigFloat::TO_CBOR { my ($m, $e) = $_[0]->parts; - tag 4, [$e->numify, $m] + + -9223372036854775808 <= $e && $e <= 18446744073709551615 + ? tag 4, [$e->numify, $m] + : tag 264, [$e, $m] +} + +sub Math::BigRat::TO_CBOR { + my ($n, $d) = $_[0]->parts; + + # older versions of BigRat need *1, as they not always return numbers + + $d*1 == 1 + ? $n*1 + : tag 30, [$n*1, $d*1] } sub Time::Piece::TO_CBOR { diff --git a/XS.xs b/XS.xs index 1611124..9e20259 100644 --- a/XS.xs +++ b/XS.xs @@ -102,7 +102,9 @@ enum cbor_tag #define F_ALLOW_SHARING 0x00000004UL #define F_ALLOW_CYCLES 0x00000008UL #define F_PACK_STRINGS 0x00000010UL -#define F_VALIDATE_UTF8 0x00000020UL +#define F_TEXT_KEYS 0x00000020UL +#define F_TEXT_STRINGS 0x00000040UL +#define F_VALIDATE_UTF8 0x00000080UL #define INIT_SIZE 32 // initial scalar size to be allocated @@ -278,17 +280,47 @@ encode_tag (enc_t *enc, UV tag) encode_uint (enc, MAJOR_TAG, tag); } +// exceptional (hopefully) slow path for byte strings that need to be utf8-encoded +ecb_noinline static void +encode_str_utf8 (enc_t *enc, int utf8, char *str, STRLEN len) +{ + STRLEN ulen = len; + U8 *p, *pend = (U8 *)str + len; + + for (p = (U8 *)str; p < pend; ++p) + ulen += *p >> 7; // count set high bits + + encode_uint (enc, MAJOR_TEXT, ulen); + + need (enc, ulen); + for (p = (U8 *)str; p < pend; ++p) + if (*p < 0x80) + *enc->cur++ = *p; + else + { + *enc->cur++ = 0xc0 + (*p >> 6); + *enc->cur++ = 0x80 + (*p & 63); + } +} + ecb_inline void -encode_str (enc_t *enc, int utf8, char *str, STRLEN len) +encode_str (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { + if (ecb_expect_false (upgrade_utf8)) + if (!utf8) + { + encode_str_utf8 (enc, utf8, str, len); + return; + } + encode_uint (enc, utf8 ? MAJOR_TEXT : MAJOR_BYTES, len); need (enc, len); memcpy (enc->cur, str, len); enc->cur += len; } -static void -encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) +ecb_inline void +encode_strref (enc_t *enc, int upgrade_utf8, int utf8, char *str, STRLEN len) { if (ecb_expect_false (enc->cbor.flags & F_PACK_STRINGS)) { @@ -309,7 +341,7 @@ encode_strref (enc_t *enc, int utf8, char *str, STRLEN len) } } - encode_str (enc, utf8, str, len); + encode_str (enc, upgrade_utf8, utf8, str, len); } static void encode_sv (enc_t *enc, SV *sv); @@ -365,7 +397,7 @@ encode_hv (enc_t *enc, HV *hv) if (HeKLEN (he) == HEf_SVKEY) encode_sv (enc, HeSVKEY (he)); else - encode_strref (enc, HeKUTF8 (he), HeKEY (he), HeKLEN (he)); + encode_strref (enc, enc->cbor.flags & (F_TEXT_KEYS | F_TEXT_STRINGS), HeKUTF8 (he), HeKEY (he), HeKLEN (he)); encode_sv (enc, ecb_expect_false (mg) ? hv_iterval (hv, he) : HeVAL (he)); } @@ -493,7 +525,7 @@ encode_rv (enc_t *enc, SV *sv) encode_tag (enc, CBOR_TAG_PERL_OBJECT); encode_uint (enc, MAJOR_ARRAY, count + 1); - encode_strref (enc, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); + encode_strref (enc, 0, HvNAMEUTF8 (stash), HvNAME (stash), HvNAMELEN (stash)); while (count) encode_sv (enc, SP[1 - count--]); @@ -562,7 +594,7 @@ encode_sv (enc_t *enc, SV *sv) { STRLEN len; char *str = SvPV (sv, len); - encode_strref (enc, SvUTF8 (sv), str, len); + encode_strref (enc, enc->cbor.flags & F_TEXT_STRINGS, SvUTF8 (sv), str, len); } else if (SvNOKp (sv)) encode_nv (enc, sv); @@ -1392,6 +1424,8 @@ void shrink (CBOR *self, int enable = 1) allow_sharing = F_ALLOW_SHARING allow_cycles = F_ALLOW_CYCLES pack_strings = F_PACK_STRINGS + text_keys = F_TEXT_KEYS + text_strings = F_TEXT_STRINGS validate_utf8 = F_VALIDATE_UTF8 PPCODE: { @@ -1410,6 +1444,8 @@ void get_shrink (CBOR *self) get_allow_sharing = F_ALLOW_SHARING get_allow_cycles = F_ALLOW_CYCLES get_pack_strings = F_PACK_STRINGS + get_text_keys = F_TEXT_KEYS + get_text_strings = F_TEXT_STRINGS get_validate_utf8 = F_VALIDATE_UTF8 PPCODE: XPUSHs (boolSV (self->flags & ix)); diff --git a/t/53_bignum.t b/t/53_bignum.t index daaf36f..d31bc0e 100644 --- a/t/53_bignum.t +++ b/t/53_bignum.t @@ -1,8 +1,10 @@ -BEGIN { $| = 1; print "1..100\n"; } +BEGIN { $| = 1; print "1..105\n"; } BEGIN { $^W = 0 } # hate use CBOR::XS; +use Math::BigInt only => "FastCalc"; # needed for representation stability + print "ok 1\n"; my $t = decode_cbor pack "H*", "82c48221196ab3c5822003"; @@ -14,7 +16,7 @@ $t = encode_cbor $t; print $t eq (pack "H*", "82c48221196ab3c482200f") ? "" : "not ", "ok 4 # ", (unpack "H*", $t), "\n"; -# Math::BigFloat is loaded by now... +# Math::BigFloat must be loaded by now... for (5..99) { my $n = Math::BigFloat->new ((int rand 1e9) . "." . (int rand 1e9) . "e" . ((int rand 1e8) - 0.5e8)); @@ -26,5 +28,22 @@ for (5..99) { print $n != $m ? "not " : "ok $_ # $n eq $m\n"; } -print "ok 100\n"; +$t = encode_cbor CBOR::XS::tag 264, [Math::BigInt->new ("99999999999999999998"), Math::BigInt->new ("799999999999999999998")]; +$t = decode_cbor $t; +print "799999999999999999998e+99999999999999999998" eq $t->bsstr ? "" : "not ", "ok 100\n"; + +$t = encode_cbor $t; +print "d9010882c249056bc75e2d63100000c2492b5e3af16b187ffffe" eq (unpack "H*", $t) ? "" : "not ", "ok 101\n"; + +$t = encode_cbor CBOR::XS::tag 30, [4, 2]; +$t = decode_cbor $t; +print $t eq 2 ? "" : "not ", "ok 102 # $t\n"; + +$t = encode_cbor $t; +print "02" eq (unpack "H*", $t) ? "" : "not ", "ok 103\n"; + +$t = encode_cbor decode_cbor encode_cbor CBOR::XS::tag 30, [Math::BigInt->new (5), 2]; +print "d81e820502" eq (unpack "H*", $t) ? "" : "not ", "ok 104\n"; + +print "ok 105\n"; diff --git a/t/58_hv.t b/t/58_hv.t new file mode 100644 index 0000000..1fd44c0 --- /dev/null +++ b/t/58_hv.t @@ -0,0 +1,70 @@ +BEGIN { $| = 1; print "1..21\n"; } + +# none of the other tests serialise hv's, gross +# also checks text_keys/text_strings + +use CBOR::XS; + +print "ok 1\n"; + +$enc = encode_cbor {}; +print $enc ne "\xa0" ? "not " : "", "ok 2\n"; + +$enc = encode_cbor { 5 => 6 }; +print $enc ne (pack "H*", "a1413506") ? "not " : "", "ok 3\n"; + +$enc = encode_cbor { "" => \my $dummy }; +print $enc ne (pack "H*", "a140d95652f6") ? "not " : "", "ok 4\n"; + +$enc = encode_cbor { undef() => \my $dummy }; +print $enc ne (pack "H*", "a140d95652f6") ? "not " : "", "ok 5\n"; + +$enc = encode_cbor { "abc" => "def" }; +print $enc ne (pack "H*", "a14361626343646566") ? "not " : "", "ok 6\n"; + +$enc = encode_cbor { "abc" => "def", "geh" => "ijk" }; +print $enc !~ /^\xa2/ ? "not " : "", "ok 7\n"; +print 17 ne length $enc ? "not " : "", "ok 8\n"; + +$enc = encode_cbor { "\x{7f}" => undef }; +print $enc ne (pack "H*", "a1417ff6") ? "not " : "", "ok 9\n"; + +$dec = decode_cbor pack "H*", "a1417ff6"; +print +(keys %$dec)[0] ne "\x{7f}" ? "not " : "", "ok 10\n"; + +$enc = encode_cbor { "\x{100}" => undef }; +print $enc ne (pack "H*", "a162c480f6") ? "not " : "", "ok 11\n"; + +$dec = decode_cbor pack "H*", "a162c480f6"; +print +(keys %$dec)[0] ne "\x{100}" ? "not " : "", "ok 12\n"; + +$enc = encode_cbor { "\x{8f}" => undef }; +print $enc ne (pack "H*", "a1418ff6") ? "not " : "", "ok 13\n"; + +$text_strings = CBOR::XS->new->text_strings; + +$enc = $text_strings->encode ({ "\x{7f}" => "\x{3f}" }); +print $enc ne (pack "H*", "a1617f613f") ? "not " : "", "ok 14\n"; + +$enc = $text_strings->encode ({ "\x{8f}" => "\x{c7}" }); +print $enc ne (pack "H*", "a162c28f62c387") ? "not " : "", "ok 15\n"; + +$enc = $text_strings->encode ({ "\x{8f}gix\x{ff}x" => "a\x{80}b\x{fe}y" }); +print $enc ne (pack "H*", "a168c28f676978c3bf786761c28062c3be79") ? "not " : "", "ok 16\n"; + +$dec = decode_cbor pack "H*", "a168c28f676978c3bf78f6"; +print +(keys %$dec)[0] ne "\x{8f}gix\x{ff}x" ? "not " : "", "ok 17\n"; + +$text_keys = CBOR::XS->new->text_keys; + +$enc = $text_keys->encode ({ "\x{7f}" => "\x{3f}" }); +print $enc ne (pack "H*", "a1617f413f") ? "not " : "", "ok 18\n"; + +$enc = $text_keys->encode ({ "\x{8f}" => "\x{c7}" }); +print $enc ne (pack "H*", "a162c28f41c7") ? "not " : "", "ok 19\n"; + +$enc = $text_keys->encode ({ "\x{8f}gix\x{ff}x" => "a\x{80}b\x{fe}y" }); +print $enc ne (pack "H*", "a168c28f676978c3bf7845618062fe79") ? "not " : "", "ok 20\n"; + +print "ok 21\n"; + -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libcbor-xs-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits