In perl.git, the branch tonyc/127743-cperl-storable-fixes has been updated <https://perl5.git.perl.org/perl.git/commitdiff/176ea37321ef56459e9e2a94b6a4f064fe6cdec7?hp=09a6c358cbc9dce019836121e6b41cb2507f3cb4>
- Log ----------------------------------------------------------------- commit 176ea37321ef56459e9e2a94b6a4f064fe6cdec7 Author: Tony Cook <[email protected]> Date: Thu Nov 2 06:55:45 2017 +0100 WIP, fix for large object ids in STORABLE_freeze cases STORABLE_freeze() can return an optional set of references after the serialization, and these can be assigned large object ids if over 2**32 scalars have been output already, resulting in a corrupt result when the object is read back in. Unfortunately we don't know if we need large object ids until after we write the SX_HOOK header. commit d93ef57d7e5824a9064d3a13fff9b27ca2b60f0d Author: Tony Cook <[email protected]> Date: Wed Nov 1 23:58:49 2017 +0100 (perl #127663) update PERL_TEST_MEMORY requirements for the older tests PERL_TEST_MEMORY includes any swap, it's not the minimum resident set for the test. commit ab32093fc0e09d90f4908f727c6afe5a9cfd17ff Author: Tony Cook <[email protected]> Date: Wed Nov 1 23:31:39 2017 +0100 (perl #127663) fix two problems with large object ids Storable assigns an object id to every scalar it freezes, including to unused elements in arrays. There were two problems here: a) in retrieve(), object ids over 2**31-1 but less than 2**32 were treated as signed, so the wrong object was produced in the resulting data structure. Two changes we made to fix this: i) retrieve() now treats object ids in the problem range as unsigned, so data written by older Storables is now treated correctly. ii) store() now writes object ids in the problem range as 64-bit ids, so that older Storables will fail rather than producing an incorrect result data structure. b) once over 2**32 scalars had been output, the code still produced 32-bit object ids when referring to previous scalars. Fixed by adding support for 64-bit object ids. There's still an issue with object ids in hook produced data. Testing these changes requires ridiculous amounts of memory - ~32GB for a) and ~66GB for b), and the tests take a long time to run, hence for those tests to run you need the following in the environment; PERL_TEST_MEMORY >= 70 PERL_RUN_SLOW_TESTS != 0 ----------------------------------------------------------------------- Summary of changes: MANIFEST | 1 + dist/Storable/Storable.xs | 114 ++++++++++++++++++--- dist/Storable/t/huge.t | 6 +- dist/Storable/t/hugeids.t | 246 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 350 insertions(+), 17 deletions(-) create mode 100644 dist/Storable/t/hugeids.t diff --git a/MANIFEST b/MANIFEST index e9b5bf71ca..dde2bbea6f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3689,6 +3689,7 @@ dist/Storable/t/HAS_ATTACH.pm For auto-requiring of modules for STORABLE_attach dist/Storable/t/HAS_HOOK.pm For auto-requiring of modules for STORABLE_thaw dist/Storable/t/HAS_OVERLOAD.pm For auto-requiring of mdoules for overload dist/Storable/t/huge.t See how Storable handles huge data +dist/Storable/t/hugeids.t See how Storable handles huge object ids dist/Storable/t/integer.t See if Storable works dist/Storable/t/interwork56.t Test compatibility kludge for 64bit data under 5.6.x dist/Storable/t/just_plain_nasty.t See if Storable works diff --git a/dist/Storable/Storable.xs b/dist/Storable/Storable.xs index a365ea7829..a0c757ed32 100644 --- a/dist/Storable/Storable.xs +++ b/dist/Storable/Storable.xs @@ -246,6 +246,16 @@ struct extendable { typedef unsigned long stag_t; /* Used by pre-0.6 binary format */ +/* + * Make the tag type 64-bit on 64-bit platforms. + * + * If the tag number is low enough it's stored as a 32-bit value, but + * with very large arrays and hashes it's possible to go over 2**32 + * scalars. + */ + +typedef STRLEN ntag_t; + /* * The following "thread-safe" related defines were contributed by * Murray Nesbitt <[email protected]> and integrated by RAM, who @@ -532,6 +542,16 @@ static stcxt_t *Context_ptr = NULL; #define LOW_32BITS(x) ((I32) ((unsigned long) (x) & 0xffffffffUL)) #endif +/* + * PTR2TAG(x) + * + * Convert a pointer into an ntag_t. + */ + +#define PTR2TAG(x) ((ntag_t)(x)) + +#define TAG2PTR(x, type) ((y)(x)) + /* * oI, oS, oC * @@ -3435,6 +3455,9 @@ static int store_hook( int clone = cxt->optype & ST_CLONE; char mtype = '\0'; /* for blessed ref to tied structures */ unsigned char eflags = '\0'; /* used when object type is SHT_EXTRA */ +#ifdef HAS_U64 + int need_large_oids = 0; +#endif TRACEME(("store_hook, classname \"%s\", tagged #%d", HvNAME_get(pkg), (int)cxt->tagnum)); @@ -3682,6 +3705,10 @@ static int store_hook( ary[i] = tag; TRACEME(("listed object %d at 0x%" UVxf " is tag #%" UVuf, i-1, PTR2UV(xsv), PTR2UV(tag))); +#ifdef HAS_U64 + if ((U32)PTR2TAG(tag) != PTR2TAG(tag)) + need_large_oids = 1; +#endif } /* @@ -3781,12 +3808,24 @@ static int store_hook( /* * NOTA BENE, for 64-bit machines: the ary[i] below does not yield a * real pointer, rather a tag number, well under the 32-bit limit. + * Which is wrong... if we have more than 2**32 SVs we can get ids over + * the 32-bit limit. */ for (i = 1; i < count; i++) { - I32 tagval = htonl(LOW_32BITS(ary[i])); - WRITE_I32(tagval); - TRACEME(("object %d, tag #%d", i-1, ntohl(tagval))); +#ifdef HAS_U64_INCOMPLETE + if (need_large_oids) { + ntag_t tag = PTR2TAG(ary[i]); + W64LEN(tag); + TRACEME(("object %d, tag #%" UVdf, i-1, ntohl(tagval), (UV)tag)); + } + else +#endif + { + I32 tagval = htonl(LOW_32BITS(ary[i])); + WRITE_I32(tagval); + TRACEME(("object %d, tag #%d", i-1, ntohl(tagval))); + } } } @@ -4089,9 +4128,8 @@ static int store(pTHX_ stcxt_t *cxt, SV *sv) svh = hv_fetch(hseen, (char *) &sv, sizeof(sv), FALSE); #endif if (svh) { - I32 tagval; - - if (sv == &PL_sv_undef) { + ntag_t tagval; + if (sv == &PL_sv_undef) { /* We have seen PL_sv_undef before, but fake it as if we have not. @@ -4122,17 +4160,41 @@ static int store(pTHX_ stcxt_t *cxt, SV *sv) } #ifdef USE_PTR_TABLE - tagval = htonl(LOW_32BITS(((char *)svh)-1)); + tagval = PTR2TAG(((char *)svh)-1); #else - tagval = htonl(LOW_32BITS(*svh)); + tagval = PTR2TAG(*svh); #endif +#ifdef HAS_U64 - TRACEME(("object 0x%" UVxf " seen as #%d", PTR2UV(sv), - ntohl(tagval))); + /* older versions of Storable streat the tag as a signed value + used in an array lookup, corrupting the data structure. + Ensure only a newer Storable will be able to parse this tag id + if it's over the 2G mark. + */ + if (tagval > I32_MAX) { - PUTMARK(SX_OBJECT); - WRITE_I32(tagval); - return 0; + TRACEME(("object 0x%" UVxf " seen as #%" UVdf, PTR2UV(sv), + tagval)); + + PUTMARK(SX_LOBJECT); + PUTMARK(SX_OBJECT); + W64LEN(tagval); + return 0; + } + else +#endif + { + I32 ltagval; + + ltagval = htonl((I32)tagval) + + TRACEME(("object 0x%" UVxf " seen as #%d", PTR2UV(sv), + ntohl(ltagval))); + + PUTMARK(SX_OBJECT); + WRITE_I32(ltagval); + return 0; + } } /* @@ -5648,6 +5710,18 @@ static SV *retrieve_lobject(pTHX_ stcxt_t *cxt, const char *cname) #endif TRACEME(("wlen %" UVuf, len)); switch (type) { + case SX_OBJECT: + { + /* not a large object, just a large index */ + SV **svh = av_fetch(cxt->aseen, len, FALSE); + if (!svh) + CROAK(("Object #%" UVuf " should have been retrieved already", + len)); + sv = *svh; + TRACEME(("had retrieved #%" UVuf " at 0x%" UVxf, len, PTR2UV(sv))); + SvREFCNT_inc(sv); + } + break; case SX_LSCALAR: sv = get_lstring(aTHX_ cxt, len, 0, cname); break; @@ -6844,7 +6918,19 @@ static SV *retrieve(pTHX_ stcxt_t *cxt, const char *cname) I32 tag; READ_I32(tag); tag = ntohl(tag); - svh = av_fetch(cxt->aseen, tag, FALSE); +#ifndef HAS_U64 + /* A 32-bit system can't have over 2**31 objects anyway */ + if (tag < 0) + CROAK(("Object #%" IVdf " out of range", (IV)tag); +#endif + /* Older versions of Storable on with 64-bit support on 64-bit + systems can produce values above the 2G boundary (or wrapped above + the 4G boundary, which we can't do much about), treat those as + unsigned. + This same commit stores tag ids over the 2G boundary as long tags + since older Storables will mis-handle them as short tags. + */ + svh = av_fetch(cxt->aseen, (U32)tag, FALSE); if (!svh) CROAK(("Object #%" IVdf " should have been retrieved already", (IV) tag)); diff --git a/dist/Storable/t/huge.t b/dist/Storable/t/huge.t index 3e36d9be63..55d6f84993 100644 --- a/dist/Storable/t/huge.t +++ b/dist/Storable/t/huge.t @@ -58,7 +58,7 @@ my @cases = ( # virtual memory. On darwin it is evtl killed. if ($Config{ptrsize} > 4 and !$has_too_many) { # needs 20-55G virtual memory, 4.6M heap and several minutes on a fast machine - if ($ENV{PERL_TEST_MEMORY} >= 8) { + if ($ENV{PERL_TEST_MEMORY} >= 55) { push @cases, [ 'huge array', sub { my @x; $x[$huge] = undef; \@x } ]; @@ -72,7 +72,7 @@ if ($Config{ptrsize} > 4 and !$has_too_many) { # Unfortunately even older 32bit perls do allow this. if (!$has_too_many) { # needs >90G virtual mem, and is evtl. killed - if ($ENV{PERL_TEST_MEMORY} >= 16) { + if ($ENV{PERL_TEST_MEMORY} >= 96) { # number of keys >I32. impossible to handle with perl5, but Storable can. push @cases, ['huge hash', @@ -83,7 +83,7 @@ if (!$has_too_many) { } -plan tests => 2 * scalar @cases; +plan tests => 2 + 2 * scalar @cases; for (@cases) { my ($desc, $build) = @$_; diff --git a/dist/Storable/t/hugeids.t b/dist/Storable/t/hugeids.t new file mode 100644 index 0000000000..c82757ce84 --- /dev/null +++ b/dist/Storable/t/hugeids.t @@ -0,0 +1,246 @@ +#!./perl + +# We do all of the work in child processes here to ensure that any +# memory used is released immediately. + +# These tests use ridiculous amounts of memory and CPU. + +use strict; +use warnings; + +use Config; +use Storable qw(store_fd retrieve_fd); +use Test::More; +use File::Temp qw(tempfile); +use Devel::Peek; + +BEGIN { + plan skip_all => 'Storable was not built' + if $ENV{PERL_CORE} && $Config{'extensions'} !~ /\b Storable \b/x; + plan skip_all => 'Need 64-bit pointers for this test' + if $Config{ptrsize} < 8 and $] > 5.013; + plan skip_all => 'Need 64-bit int for this test on older versions' + if $Config{uvsize} < 8 and $] < 5.013; + plan skip_all => 'Need ~34 GiB memory for this test, set PERL_TEST_MEMORY >= 34' + if !$ENV{PERL_TEST_MEMORY} || $ENV{PERL_TEST_MEMORY} < 34; + plan skip_all => 'These tests are slow, set PERL_RUN_SLOW_TESTS' + unless $ENV{PERL_RUN_SLOW_TESTS}; + plan skip_all => "Need fork for this test", + unless $Config{d_fork}; +} + +plan tests => 6; + +my $skips = $ENV{PERL_STORABLE_SKIP_ID_TEST} || ''; + +freeze_thaw_test + ( + name => "object ids between 2G and 4G", + freeze => \&make_2g_data, + thaw => \&check_2g_data, + id => "2g", + memory => 34, + ); + +freeze_thaw_test + ( + name => "object ids over 4G", + freeze => \&make_4g_data, + thaw => \&check_4g_data, + id => "4g", + memory => 70, + ); + +freeze_thaw_test + ( + name => "hook object ids over 4G", + freeze => \&make_hook_data, + thaw => \&check_hook_data, + id => "hook4g", + memory => 70, + todo_thaw => "no 64-bit oids in SX_hook yet", + ); + +sub freeze_thaw_test { + my %opts = @_; + + my $freeze = $opts{freeze} + or die "Missing freeze"; + my $thaw = $opts{thaw} + or die "Missing thaw"; + my $id = $opts{id} + or die "Missing id"; + my $name = $opts{name} + or die "Missing name"; + my $memory = $opts{memory} + or die "Missing memory"; + my $todo_thaw = $opts{todo_thaw} || ""; + + SKIP: + { + # IPC::Run would be handy here + + $ENV{PERL_TEST_MEMORY} >= $memory + or skip "Not enough memory to test $name", 2; + $skips =~ /\b\Q$id\E\b/ + and skip "You requested test $name be skipped", 2; + my $stored; + if (defined(my $pid = open(my $fh, "-|"))) { + unless ($pid) { + # child + open my $cfh, "|-", "gzip" + or die "Cannot pipe to gzip: $!"; + binmode $cfh; + $freeze->($cfh); + exit; + } + # parent + $stored = do { local $/; <$fh> }; + close $fh; + } + else { + skip "$name: Cannot fork for freeze", 2; + } + ok($stored, "$name: we got output data") + or skip "$name: skipping thaw test", 1; + + my ($tfh, $tname) = tempfile(); + + #my $tname = "$id.store.gz"; + #open my $tfh, ">", $tname or die; + #binmode $tfh; + + print $tfh $stored; + close $tfh; + + if (defined(my $pid = open(my $fh, "-|"))) { + unless ($pid) { + # child + open my $bfh, "-|", "gunzip <$tname" + or die "Cannot pipe from gunzip: $!"; + binmode $bfh; + $thaw->($bfh); + exit; + } + my $out = do { local $/; <$fh> }; + chomp $out; + local $TODO = $todo_thaw; + is($out, "OK", "$name: check result"); + } + else { + skip "$name: Cannot fork for thaw", 1; + } + } +} + + +sub make_2g_data { + my ($fh) = @_; + my @x; + my $y = 1; + my $z = 2; + my $g2 = 0x80000000; + $x[0] = \$y; + $x[$g2] = \$y; + $x[$g2+1] = \$z; + $x[$g2+2] = \$z; + store_fd(\@x, $fh); +} + +sub check_2g_data { + my ($fh) = @_; + my $x = retrieve_fd($fh); + my $g2 = 0x80000000; + $x->[0] == $x->[$g2] + or die "First entry mismatch"; + $x->[$g2+1] == $x->[$g2+2] + or die "2G+ entry mismatch"; + print "OK"; +} + +sub make_4g_data { + my ($fh) = @_; + my @x; + my $y = 1; + my $z = 2; + my $g4 = 2*0x80000000; + $x[0] = \$y; + $x[$g4] = \$y; + $x[$g4+1] = \$z; + $x[$g4+2] = \$z; + store_fd(\@x, $fh); +} + +sub check_4g_data { + my ($fh) = @_; + my $x = retrieve_fd($fh); + my $g4 = 2*0x80000000; + $x->[0] == $x->[$g4] + or die "First entry mismatch"; + $x->[$g4+1] == $x->[$g4+2] + or die "4G+ entry mismatch"; + ${$x->[$g4+1]} == 2 + or die "Incorrect value in 4G+ entry"; + print "OK"; +} + +sub make_hook_data { + my ($fh) = @_; + my @x; + my $y = HookLargeIds->new(101, { name => "one" }); + my $z = HookLargeIds->new(201, { name => "two" }); + my $g4 = 2*0x8000_0000; + $x[0] = $y; + $x[$g4] = $y; + $x[$g4+1] = $z; + $x[$g4+2] = $z; + store_fd(\@x, $fh); +} + +sub check_hook_data { + my ($fh) = @_; + my $x = retrieve_fd($fh); + my $g4 = 2*0x8000_0000; + my $y = $x->[$g4+1]; + $y = $x->[$g4+1]; + $y->id == 201 + or die "Incorrect id in 4G+ object"; + ref($y->data) eq 'HASH' + or die "data isn't a ref"; + $y->data->{name} eq "two" + or die "data name not 'one'"; + print "OK"; +} + +package HookLargeIds; + +sub new { + my $class = shift; + my ($id, $data) = @_; + return bless { id => $id, data => $data }, $class; +} + +sub STORABLE_freeze { + #print STDERR "freeze called\n"; + #Devel::Peek::Dump($_[0]); + + return $_[0]->id, $_[0]->data; +} + +sub STORABLE_thaw { + my ($self, $cloning, $ser, $data) = @_; + + #Devel::Peek::Dump(\@_); + #print STDERR "thaw called\n"; + #Devel::Peek::Dump($self); + $self->{id} = $ser+0; + $self->{data} = $data; +} + +sub id { + $_[0]{id}; +} + +sub data { + $_[0]{data}; +} -- Perl5 Master Repository
