On Thu, 24 Apr 2008 13:31:47 +0100, Tim Bunce <[EMAIL PROTECTED]> wrote:

Hi Tim,

> So I'd be happy to see an API like this:
> 
> SV *_concat_hash_sorted( HV *hv, char *kv_sep, char *pair_sep, SV
> *value_format, SV *sort_type)
> 

Attached is the latest draft of _concat_hash_sorted(). I cleaned up the
types, fixed a few bugs,  and formalized the test suite.


Also, while I was testing, I found what looks to be a memory leak in
neat_svpv:

        if (SvIOK(sv))
-            nsv = newSVpvf("%"IVdf, SvIVX(sv));
-       else nsv = newSVpvf("%"NVgf, SvNVX(sv));
+            nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv)));
+       else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv)));


If there is anything you would like changed, let me know. I was not sure
about using strcat/strncat, so if you'd like, I will change those. 

Oh, And is there a way to attach a string to an SV w/o copying it?

-r
--- DBI-1.604/DBI.xs	2008-03-24 09:44:38.000000000 -0400
+++ DBI-1.604-concat_hash/DBI.xs	2008-04-28 14:57:57.000000000 -0400
@@ -209,6 +209,95 @@
     return buf;
 }
 
+static int
+_cmp_number (val1, val2)
+    const void *val1;
+    const void *val2;
+{
+    dTHX;
+    double first, second;
+    char **endptr = 0;
+    int old_err;
+
+    old_err = errno; /* needed ? */
+    errno = 0;
+    first = strtod(*(char **)val1, endptr);
+    if (0 != errno) {
+            croak(strerror(errno));
+    }
+    errno = 0;
+    second = strtod(*(char **)val2, endptr);
+    if (0 != errno) {
+            croak(strerror(errno));
+    }
+    errno = old_err;
+
+    if (first == second)
+        return 0;
+    else if (first > second)
+	return 1;
+    else 
+	return -1;
+}
+
+static int _cmp_str (val1, val2)
+    const void *val1;
+    const void *val2;
+{
+    dTHX;
+    return strcmp( *(char **)val1, *(char **)val2);
+}
+
+char **
+_sort_hash_keys (hash, sort_order, total_length)
+    HV *hash;
+    char sort_order;
+    STRLEN *total_length;
+{
+    dTHX;
+    I32 hv_len, key_len;
+    SV *look_num;
+    HE *entry;
+    char **keys;
+    void *sort;
+    unsigned int idx = 0;
+    STRLEN tot_len = 0;
+
+    hv_len = hv_iterinit(hash);
+    if (!hv_len)
+        return 0;
+
+    keys = malloc(sizeof(char *)*hv_len);
+    if (!keys)
+        croak("Unable to allocate memory");
+
+    while ((entry = hv_iternext(hash))) {
+        *(keys+(idx++)) = hv_iterkey(entry, &key_len);
+        tot_len += key_len;
+    }
+    if (0 != total_length)
+        *total_length = tot_len;
+
+    /* replace with function table */
+    if (sort_order < 0) {
+        look_num = sv_2mortal(newSVpv(keys[0],0));
+        if (looks_like_number(look_num))
+            sort = _cmp_number;
+        else
+            sort = _cmp_str;
+    } else if (0 == sort_order) {
+        sort = _cmp_str;
+    } else if (1 == sort_order) {
+        sort = _cmp_number;
+    } else {
+        croak("Unknown sort order %i", sort_order);
+    }
+    qsort(keys, hv_len, sizeof(char*), sort);
+    return keys;
+}
+
+
+
 /* handy for embedding into condition expression for debugging */
 /*
 static int warn1(char *s) { warn(s); return 1; }
@@ -374,8 +463,8 @@
 	}
 	/* we don't use SvPV here since we don't want to alter sv in _any_ way	*/
 	if (SvIOK(sv))
-	     nsv = newSVpvf("%"IVdf, SvIVX(sv));
-	else nsv = newSVpvf("%"NVgf, SvNVX(sv));
+	     nsv = sv_2mortal(newSVpvf("%"IVdf, SvIVX(sv)));
+	else nsv = sv_2mortal(newSVpvf("%"NVgf, SvNVX(sv)));
 	if (infosv)
 	    sv_catsv(nsv, infosv);
 	return SvPVX(nsv);
@@ -4236,6 +4325,102 @@
     RETVAL
 
 
+SV *
+_concat_hash_sorted (hash, kv_separator, pair_separator, value_format,sort_type)
+    HV *hash
+    SV *kv_separator
+    SV *pair_separator
+    SV *value_format
+    SV *sort_type
+
+    PREINIT:
+        I32 hv_len;
+        STRLEN kv_sep_len, pair_sep_len, hv_val_len, pos=0, total_len = 0;
+        char **keys;
+        char *joined, *kv_sep, *pair_sep, *hv_val;
+        unsigned int i = 0;
+        char sort;
+        SV **hash_svp;
+        SV *return_sv;
+        bool not_neat;
+    CODE:
+
+        kv_sep = SvPV(kv_separator, kv_sep_len);
+        pair_sep = SvPV(pair_separator, pair_sep_len);
+
+        if (SvGMAGICAL(value_format))
+            mg_get(value_format);
+        not_neat = SvTRUE(value_format);
+
+        sort = -1;
+        if (SvOK(sort_type)) {
+            sort = SvIV(sort_type);
+        }
+
+
+        keys = _sort_hash_keys(hash, sort, &total_len);
+        if (!keys) {
+            ST(0) = Nullsv;
+            return;
+	}
+        hv_len = hv_iterinit(hash);
+        /* total_len += Separators + quotes + term null */
+        total_len += kv_sep_len*hv_len + pair_sep_len*hv_len+2*hv_len+1;
+        joined = malloc(total_len*sizeof(char));
+
+        for (i=0; i<hv_len; ++i) {
+            if (i > 0) {
+                strcpy(joined+pos, pair_sep);
+                pos += pair_sep_len;
+            }
+            strcpy(joined+pos, keys[i]);
+            pos += strlen(keys[i]);
+
+            hash_svp = hv_fetch(hash, keys[i], strlen(keys[i]), 0);
+            if (hash_svp) {
+                strcpy(joined+pos, kv_sep);
+                pos += kv_sep_len;
+                if (not_neat) {
+                    if (!SvOK(*hash_svp)) {
+                        strcpy(joined+(pos), "''");
+                        pos += 2;
+                        continue;
+                    }
+                    hv_val = SvPV(*hash_svp, hv_val_len);
+                    total_len += hv_val_len;
+                    if (!(joined=realloc(joined, total_len)))
+                        croak("Unable to allocate memory");
+                    strcpy(joined+(pos++), "'");
+                    strcpy(joined+pos, hv_val);
+                    pos += hv_val_len;
+                    strcpy(joined+(pos++), "'");
+                } else {
+                    hv_val = neatsvpv(*hash_svp, 0);
+                    hv_val_len = strlen(hv_val);
+                    total_len += hv_val_len;
+                    if (!(joined=realloc(joined, total_len)))
+                        croak("Unable to allocate memory");
+                    strcpy(joined+pos, hv_val);
+                    pos += hv_val_len;
+                }
+            }
+        }
+
+        free(keys);
+	/* assert(pos+1 < total_len); */
+
+	return_sv = newSVpvn(joined, pos);
+	free(joined);
+        RETVAL=return_sv;
+
+    OUTPUT:
+        RETVAL
+
+
+
+
+
+
 MODULE = DBI   PACKAGE = DBI::var
 
 void
# Before `make install' is performed this script should be runnable with
# `make test'. After `make install' it should work as `perl CatHash.t'

#########################

# change 'tests => 1' to 'tests => last_test_to_print';

use Test::More tests => 36;
BEGIN { use_ok('DBI') };


# null and undefs -- segfaults?;
is (DBI::_concat_hash_sorted({ }, "=", ":", 0, undef), undef);
eval {DBI::_concat_hash_sorted(undef, "=", ":", 0, undef), undef};
like ($@, qr/hash is not a hash reference/); #XXX check this
is (DBI::_concat_hash_sorted({ }, undef, ":", 0, undef), undef);
is (DBI::_concat_hash_sorted({ }, "=", undef, 0, undef), undef);
is (DBI::_concat_hash_sorted({ }, "=", ":", undef, undef), undef);


# Simple segfault tests?
ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "="x12000, ":", 1, undef));
ok(DBI::_concat_hash_sorted({bob=>'two', fred=>'one' }, "=", ":"x12000, 1, undef));
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":", 1, undef));
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "=", ":"x12000, 1, undef), 'test');
ok(DBI::_concat_hash_sorted({map {$_=>undef}x100000}, "="x12000, ":"x12000, 1, undef), 'test');


my $simple_hash = {
    bob=>"there",
    jack=>12,
     fred=>"there",
     norman=>"there",
     sam =>undef
};

my $simple_numeric = {
    1=>"there",
    2=>"there",
    3=>"there",
    32=>"there",
    16 => 'yo',
    07 => "buddy",
    49 => undef,
};

my $simple_mixed = {
    bob=>"there",
    jack=>12,
     fred=>"there",
     norman=>"there",
     sam =>undef,
    1=>"there",
    2=>"there",
    3=>"there",
    32=>"there",
    16 => 'yo',
    07 => "buddy",
	    49 => undef,
};

my $simple_float = {
    1.12 =>"there",
    3.1415926 =>"there",
    2.718281828 =>"there",
    32=>"there",
    1.6 => 'yo',
    0.78 => "buddy",
    49 => undef,
};

eval {
    DBI::_concat_hash_sorted($simple_hash, "=",,":",1,12);
};
like ($@, qr/Unknown sort order/, "Unknown sort order");



## Loopify and Add Neat


my %neats = ("Neat"=>0, "Not Neat"=> 0);
my %sort_types = (guess=>undef, numeric => 1, lexical=> 0);
my %hashes = (
    Numeric=>$simple_numeric, 
    "Simple Hash" => $simple_hash, 
    "Mixed Hash" => $simple_mixed,
    "Float Hash" => $simple_float
);

for $sort_type (keys %sort_types){
    for $neat (keys %neats) {
        for $hash(keys %hashes) {
            test_concat_hash($hash, $neat, $sort_type);
        }
    }
}

sub test_concat_hash {
    my ($hash, $neat, $sort_type) = @_;
    is (
        DBI::_concat_hash_sorted(
            $hashes{$hash}, "=", ":",$neats{$neat}, $sort_types{$sort_type}
        ),
        _concat_hash_sorted(
            $hashes{$hash} , "=", ":",$neats{$neat}, $sort_types{$sort_type}
        ),
        "$hash - $neat $sort_type"
    );
}

if (0) {
    eval {
        use Benchmark qw(:all);
        cmpthese(200_000, {
	    Perl => sub {_concat_hash_sorted($simple_hash, "=", ":",0,undef); },
	    C=> sub {DBI::_concat_hash_sorted($simple_hash, "=", ":",0,undef);}
        });

        print "\n";
        cmpthese(200_000, {
  	    NotNeat => sub {DBI::_concat_hash_sorted(
                $simple_hash, "=", ":",1,undef);
            },
	    Neat    => sub {DBI::_concat_hash_sorted(
                $simple_hash, "=", ":",0,undef);
            }
        });
    };

}
#CatHash::_concat_hash_values({ }, ":-",,"::",1,1);


sub _concat_hash_sorted {
    my ( $hash_ref, $kv_separator, $pair_separator, $value_format, $sort_type ) = @_;
    # $value_format: false=use neat(), true=dumb quotes
    # $sort_type: 0=lexical, 1=numeric, undef=try to guess

    $keys = _get_sorted_hash_keys($hash_ref, $sort_type);
    my $string = '';
    for my $key (@$keys) {
        $string .= $pair_separator if length $string > 0;
        my $value = $hash_ref->{$key};
        if ($value_format) {
            $value = (defined $value) ? "'$value'" : 'undef';
        }
        else {
            $value = DBI::neat($value,0);
        }
        $string .= $key . $kv_separator . $value;
    }
    return $string;
}

use Scalar::Util qw(looks_like_number);
sub _get_sorted_hash_keys {
    my ($hash_ref, $sort_type) = @_;
    if (not defined $sort_type) {
        my $first_key = (each %$hash_ref)[0];
        $sort_type = looks_like_number($first_key);
    }
    my @keys = keys %$hash_ref;
    no warnings 'numeric';
    my @keys = ($sort_type)
        ? sort {$a <=> $b} @keys
        : sort    @keys;
    return [EMAIL PROTECTED];
}



#########################

# Insert your test code below, the Test::More module is use()ed here so read
# its man page ( perldoc Test::More ) for help writing this test script.

Reply via email to