Hello!
I noticed that "optimized" variance2/variance3 function in DBD::SQLite::Cookbook
saves state in global variables.
I think this is incorrect (how `SELECT variance(a), variance(b) FROM t;` is
supposed to work? besides, they even don't reset state in new).
Patch that fixes this error (and few others typos) attached.
Index: DBD-SQLite-1.33/lib/DBD/SQLite/Cookbook.pod
===================================================================
--- DBD-SQLite-1.33.orig/lib/DBD/SQLite/Cookbook.pod	2011-09-06 03:25:18.000000000 +0400
+++ DBD-SQLite-1.33/lib/DBD/SQLite/Cookbook.pod	2011-09-06 03:55:47.000000000 +0400
@@ -42,7 +42,7 @@ adapted from an example implementation i
   
       my $sigma = 0;
       foreach my $v ( @$self ) {
-          $sigma += ($x - $mu)**2;
+          $sigma += ($v - $mu)**2;
       }
       $sigma = $sigma / ($n - 1);
   
@@ -66,41 +66,38 @@ expense of precision:
 
   package variance2;
   
-  my $sum   = 0;
-  my $count = 0;
-  my %hash;
-  
-  sub new { bless [], shift; }
+  sub new { bless {sum => 0, count=>0, hash=> {} }, shift; }
   
   sub step {
       my ( $self, $value ) = @_;
+      my $hash = $self->{hash};
   
       # by truncating and hashing, we can comsume many more data points
       $value = int($value); # change depending on need for precision
                             # use sprintf for arbitrary fp precision
-      if (defined $hash{$value}) {
-          $hash{$value}++;
+      if (exists $hash->{$value}) {
+          $hash->{$value}++;
       } else {
-          $hash{$value} = 1;
+          $hash->{$value} = 1;
       }
-      $sum += $value;
-      $count++;
+      $self->{sum} += $value;
+      $self->{count}++;
   }
   
   sub finalize {
       my $self = $_[0];
   
       # Variance is NULL unless there is more than one row
-      return undef unless $count > 1;
+      return undef unless $self->{count} > 1;
   
       # calculate avg
-      my $mu = $sum / $count;
+      my $mu = $self->{sum} / $self->{count};
   
       my $sigma = 0;
-      foreach my $h (keys %hash) {
-          $sigma += (($h - $mu)**2) * $hash{$h};
+      while (my ($h, $v) = each %{$self->{hash}}) {
+          $sigma += (($h - $mu)**2) * $v;
       }
-      $sigma = $sigma / ($count - 1);
+      $sigma = $sigma / ($self->{count} - 1);
   
       return $sigma;
   }
@@ -115,25 +112,21 @@ The function can then be used as:
 
 A third variable implementation, designed for arbitrarily large data sets:
 
-  package variance;
-  
-  my $mu = 0;
-  my $count = 0;
-  my $S = 0
+  package variance3;
   
-  sub new { bless [], shift; }
+  sub new { bless {mu=>0, count=>0, S=>0}, shift; }
   
   sub step {
       my ( $self, $value ) = @_;
-      $count++;
-      $delta = $value - $mu;
-      $mu = $mu + $delta/$count
-      $S = $S + $delta*($value - $mu);
+      $self->{count}++;
+      my $delta = $value - $self->{mu};
+      $self->{mu} += $delta/$self->{count};
+      $self->{S} += $delta*($value - $self->{mu});
   }
   
   sub finalize {
       my $self = $_[0];
-      return $S / ($count - 1);
+      return $self->{S} / ($self->{count} - 1);
   }
 
 The function can then be used as:
_______________________________________________
DBD-SQLite mailing list
DBD-SQLite@lists.scsys.co.uk
http://lists.scsys.co.uk/cgi-bin/mailman/listinfo/dbd-sqlite

Reply via email to