On Thu, 24 Apr 2008 13:31:47 +0100, Tim Bunce <[EMAIL PROTECTED]> wrote:
Hi Tim, > So I'd be happy to see an API like this: > > SV *_concat_hash_sorted( HV *hv, char *kv_sep, char *pair_sep, SV > *value_format, SV *sort_type) > Attached is the latest draft of _concat_hash_sorted(). I cleaned up the types, fixed a few bugs, and formalized the test suite. Also, while I was testing, I found what looks to be a memory leak in neat_svpv: if (SvIOK(sv)) - nsv = newSVpvf("%"IVdf, SvIVX(sv)); - else nsv = newSVpvf("%"NVgf, SvNVX(sv)); + nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv))); + else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv))); If there is anything you would like changed, let me know. I was not sure about using strcat/strncat, so if you'd like, I will change those. Oh, And is there a way to attach a string to an SV w/o copying it? -r
--- DBI-1.604/DBI.xs 2008-03-24 09:44:38.000000000 -0400 +++ DBI-1.604-concat_hash/DBI.xs 2008-04-28 14:57:57.000000000 -0400 @@ -209,6 +209,95 @@ return buf; } +static int +_cmp_number (val1, val2) + const void *val1; + const void *val2; +{ + dTHX; + double first, second; + char **endptr = 0; + int old_err; + + old_err = errno; /* needed ? */ + errno = 0; + first = strtod(*(char **)val1, endptr); + if (0 != errno) { + croak(strerror(errno)); + } + errno = 0; + second = strtod(*(char **)val2, endptr); + if (0 != errno) { + croak(strerror(errno)); + } + errno = old_err; + + if (first == second) + return 0; + else if (first > second) + return 1; + else + return -1; +} + +static int _cmp_str (val1, val2) + const void *val1; + const void *val2; +{ + dTHX; + return strcmp( *(char **)val1, *(char **)val2); +} + +char ** +_sort_hash_keys (hash, sort_order, total_length) + HV *hash; + char sort_order; + STRLEN *total_length; +{ + dTHX; + I32 hv_len, key_len; + SV *look_num; + HE *entry; + char **keys; + void *sort; + unsigned int idx = 0; + STRLEN tot_len = 0; + + hv_len = hv_iterinit(hash); + if (!hv_len) + return 0; + + keys = malloc(sizeof(char *)*hv_len); + if (!keys) + croak("Unable to allocate memory"); + + while ((entry = hv_iternext(hash))) { + *(keys+(idx++)) = hv_iterkey(entry, &key_len); + tot_len += key_len; + } + if (0 != total_length) + *total_length = tot_len; + + /* replace with function table */ + if (sort_order < 0) { + look_num = sv_2mortal(newSVpv(keys[0],0)); + if (looks_like_number(look_num)) + sort = _cmp_number; + else + sort = _cmp_str; + } else if (0 == sort_order) { + sort = _cmp_str; + } else if (1 == sort_order) { + sort = _cmp_number; + } else { + croak("Unknown sort order %i", sort_order); + } + qsort(keys, hv_len, sizeof(char*), sort); + return keys; +} + + + /* handy for embedding into condition expression for debugging */ /* static int warn1(char *s) { warn(s); return 1; } @@ -374,8 +463,8 @@ } /* we don't use SvPV here since we don't want to alter sv in _any_ way */ if (SvIOK(sv)) - nsv = newSVpvf("%"IVdf, SvIVX(sv)); - else nsv = newSVpvf("%"NVgf, SvNVX(sv)); + nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv))); + else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv))); if (infosv) sv_catsv(nsv, infosv); return SvPVX(nsv); @@ -4236,6 +4325,102 @@ RETVAL +SV * +_concat_hash_sorted (hash, kv_separator, pair_separator, value_format,sort_type) + HV *hash + SV *kv_separator + SV *pair_separator + SV *value_format + SV *sort_type + + PREINIT: + I32 hv_len; + STRLEN kv_sep_len, pair_sep_len, hv_val_len, pos=0, total_len = 0; + char **keys; + char *joined, *kv_sep, *pair_sep, *hv_val; + unsigned int i = 0; + char sort; + SV **hash_svp; + SV *return_sv; + bool not_neat; + CODE: + + kv_sep = SvPV(kv_separator, kv_sep_len); + pair_sep = SvPV(pair_separator, pair_sep_len); + + if (SvGMAGICAL(value_format)) + mg_get(value_format); + not_neat = SvTRUE(value_format); + + sort = -1; + if (SvOK(sort_type)) { + sort = SvIV(sort_type); + } + + + keys = _sort_hash_keys(hash, sort, &total_len); + if (!keys) { + ST(0) = Nullsv; + return; + } + hv_len = hv_iterinit(hash); + /* total_len += Separators + quotes + term null */ + total_len += kv_sep_len*hv_len + pair_sep_len*hv_len+2*hv_len+1; + joined = malloc(total_len*sizeof(char)); + + for (i=0; i<hv_len; ++i) { + if (i > 0) { + strcpy(joined+pos, pair_sep); + pos += pair_sep_len; + } + strcpy(joined+pos, keys[i]); + pos += strlen(keys[i]); + + hash_svp = hv_fetch(hash, keys[i], strlen(keys[i]), 0); + if (hash_svp) { + strcpy(joined+pos, kv_sep); + pos += kv_sep_len; + if (not_neat) { + if (!SvOK(*hash_svp)) { + strcpy(joined+(pos), "''"); + pos += 2; + continue; + } + hv_val = SvPV(*hash_svp, hv_val_len); + total_len += hv_val_len; + if (!(joined=realloc(joined, total_len))) + croak("Unable to allocate memory"); + strcpy(joined+(pos++), "'"); + strcpy(joined+pos, hv_val); + pos += hv_val_len; + strcpy(joined+(pos++), "'"); + } else { + hv_val = neatsvpv(*hash_svp, 0); + hv_val_len = strlen(hv_val); + total_len += hv_val_len; + if (!(joined=realloc(joined, total_len))) + croak("Unable to allocate memory"); + strcpy(joined+pos, hv_val); + pos += hv_val_len; + } + } + } + + free(keys); + /* assert(pos+1 < total_len); */ + + return_sv = newSVpvn(joined, pos); + free(joined); + RETVAL=return_sv; + + OUTPUT: + RETVAL + + + + + + MODULE = DBI PACKAGE = DBI::var void
# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl CatHash.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 36; BEGIN { use_ok('DBI') }; # null and undefs -- segfaults?; is (DBI::_concat_hash_sorted({ }, "=", ":", 0, undef), undef); eval {DBI::_concat_hash_sorted(undef, "=", ":", 0, undef), undef}; like ($@, qr/hash is not a hash reference/); #XXX check this is (DBI::_concat_hash_sorted({ }, undef, ":", 0, undef), undef); is (DBI::_concat_hash_sorted({ }, "=", undef, 0, undef), undef); is (DBI::_concat_hash_sorted({ }, "=", ":", undef, undef), undef); # Simple segfault tests? ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "="x12000, ":", 1, undef)); ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "=", ":"x12000, 1, undef)); ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":", 1, undef)); ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "=", ":"x12000, 1, undef), 'test'); ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":"x12000, 1, undef), 'test'); my $simple_hash = { bob=>"there", jack=>12, fred=>"there", norman=>"there", sam =>undef }; my $simple_numeric = { 1=>"there", 2=>"there", 3=>"there", 32=>"there", 16 => 'yo', 07 => "buddy", 49 => undef, }; my $simple_mixed = { bob=>"there", jack=>12, fred=>"there", norman=>"there", sam =>undef, 1=>"there", 2=>"there", 3=>"there", 32=>"there", 16 => 'yo', 07 => "buddy", 49 => undef, }; my $simple_float = { 1.12 =>"there", 3.1415926 =>"there", 2.718281828 =>"there", 32=>"there", 1.6 => 'yo', 0.78 => "buddy", 49 => undef, }; eval { DBI::_concat_hash_sorted($simple_hash, "=",,":",1,12); }; like ($@, qr/Unknown sort order/, "Unknown sort order"); ## Loopify and Add Neat my %neats = ("Neat"=>0, "Not Neat"=> 0); my %sort_types = (guess=>undef, numeric => 1, lexical=> 0); my %hashes = ( Numeric=>$simple_numeric, "Simple Hash" => $simple_hash, "Mixed Hash" => $simple_mixed, "Float Hash" => $simple_float ); for $sort_type (keys %sort_types){ for $neat (keys %neats) { for $hash(keys %hashes) { test_concat_hash($hash, $neat, $sort_type); } } } sub test_concat_hash { my ($hash, $neat, $sort_type) = @_; is ( DBI::_concat_hash_sorted( $hashes{$hash}, "=", ":",$neats{$neat}, $sort_types{$sort_type} ), _concat_hash_sorted( $hashes{$hash} , "=", ":",$neats{$neat}, $sort_types{$sort_type} ), "$hash - $neat $sort_type" ); } if (0) { eval { use Benchmark qw(:all); cmpthese(200_000, { Perl => sub {_concat_hash_sorted($simple_hash, "=", ":",0,undef); }, C=> sub {DBI::_concat_hash_sorted($simple_hash, "=", ":",0,undef);} }); print "\n"; cmpthese(200_000, { NotNeat => sub {DBI::_concat_hash_sorted( $simple_hash, "=", ":",1,undef); }, Neat => sub {DBI::_concat_hash_sorted( $simple_hash, "=", ":",0,undef); } }); }; } #CatHash::_concat_hash_values({ }, ":-",,"::",1,1); sub _concat_hash_sorted { my ( $hash_ref, $kv_separator, $pair_separator, $value_format, $sort_type ) = @_; # $value_format: false=use neat(), true=dumb quotes # $sort_type: 0=lexical, 1=numeric, undef=try to guess $keys = _get_sorted_hash_keys($hash_ref, $sort_type); my $string = ''; for my $key (@$keys) { $string .= $pair_separator if length $string > 0; my $value = $hash_ref->{$key}; if ($value_format) { $value = (defined $value) ? "'$value'" : 'undef'; } else { $value = DBI::neat($value,0); } $string .= $key . $kv_separator . $value; } return $string; } use Scalar::Util qw(looks_like_number); sub _get_sorted_hash_keys { my ($hash_ref, $sort_type) = @_; if (not defined $sort_type) { my $first_key = (each %$hash_ref)[0]; $sort_type = looks_like_number($first_key); } my @keys = keys %$hash_ref; no warnings 'numeric'; my @keys = ($sort_type) ? sort {$a <=> $b} @keys : sort @keys; return [EMAIL PROTECTED]; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script.