On 5/7/08, Dennis Cote <[EMAIL PROTECTED]> wrote:
> Samuel Neff wrote:
>  > This query runs slow:
>  >
>  > SELECT id FROM data ORDER BY random();
>  >
>  > but this equivalent query runs very fast:
>  >
>  > SELECT id FROM (SELECT id, random() r FROM data) ORDER BY r;
>  >
>
>
> I couldn't see how these would be different so I fired up the explain
>  command. As I expected, these two produce identical code (except for the
>   integer id assigned to the ephemeral table used for the sort). I don't
>  think here will be any difference in speed between these two statements.
>
>  SQLite version 3.5.7
>  Enter ".help" for instructions
>  sqlite> create table t (id integer primary key, a text);
>  sqlite> .explain
>  sqlite> explain select id from t order by random();
>  addr  opcode         p1    p2    p3    p4             p5  comment
>  ----  -------------  ----  ----  ----  -------------  --  -------------
>  0     Trace          0     0     0     explain select id from t order by
>  random(
>  );  00
>  1     OpenEphemeral  1     3     0     keyinfo(1,BINARY)  00
>  2     Goto           0     26    0                    00
>  3     OpenRead       0     2     0                    00
>  4     SetNumColumns  0     0     0                    00
>  5     Rewind         0     14    0                    00
>  6     Rowid          0     1     0                    00
>  7     MakeRecord     1     1     2                    00
>  8     Function       0     0     3     random(-1)     00
>  9     Sequence       1     4     0                    00
>  10    Move           2     5     0                    00
>  11    MakeRecord     3     3     6                    00
>  12    IdxInsert      1     6     0                    00
>  13    Next           0     6     0                    00
>  14    Close          0     0     0                    00
>  15    OpenPseudo     2     0     0                    00
>  16    SetNumColumns  2     1     0                    00
>  17    Sort           1     24    0                    00
>  18    Column         1     2     2                    00
>  19    Integer        1     6     0                    00
>  20    Insert         2     2     6                    00
>  21    Column         2     0     1                    00
>  22    ResultRow      1     1     0                    00
>  23    Next           1     18    0                    00
>  24    Close          2     0     0                    00
>  25    Halt           0     0     0                    00
>  26    Transaction    0     0     0                    00
>  27    VerifyCookie   0     1     0                    00
>  28    TableLock      0     2     0     t              00
>  29    Goto           0     3     0                    00
>  sqlite> explain select id from (select id, random() r from t) order by r;
>  addr  opcode         p1    p2    p3    p4             p5  comment
>  ----  -------------  ----  ----  ----  -------------  --  -------------
>  0     Trace          0     0     0     explain select id from (select
>  id, random
>  () r from t) order by r;  00
>  1     OpenEphemeral  2     3     0     keyinfo(1,BINARY)  00
>  2     Goto           0     26    0                    00
>  3     OpenRead       1     2     0                    00
>  4     SetNumColumns  1     0     0                    00
>  5     Rewind         1     14    0                    00
>  6     Rowid          1     1     0                    00
>  7     MakeRecord     1     1     2                    00
>  8     Function       0     0     3     random(-1)     00
>  9     Sequence       2     4     0                    00
>  10    Move           2     5     0                    00
>  11    MakeRecord     3     3     6                    00
>  12    IdxInsert      2     6     0                    00
>  13    Next           1     6     0                    00
>  14    Close          1     0     0                    00
>  15    OpenPseudo     3     0     0                    00
>  16    SetNumColumns  3     1     0                    00
>  17    Sort           2     24    0                    00
>  18    Column         2     2     2                    00
>  19    Integer        1     6     0                    00
>  20    Insert         3     2     6                    00
>  21    Column         3     0     1                    00
>  22    ResultRow      1     1     0                    00
>  23    Next           2     18    0                    00
>  24    Close          3     0     0                    00
>  25    Halt           0     0     0                    00
>  26    Transaction    0     0     0                    00
>  27    VerifyCookie   0     1     0                    00
>  28    TableLock      0     2     0     t              00
>  29    Goto           0     3     0                    00
>  sqlite>
>

when in doubt, benchmark.

Dennis is correct. Samuel Neff's solution, which, I first thought was
really creative, is no better than the original try. On the other
hand, taking the data out of the db and shuffling them in memory is
much faster by far. Here is my result in Perl --

Benchmark: timing 20 iterations of PERL_SHUFFLE, SEL_SORT, SORT_SEL...
PERL_SHUFFLE:  1 wallclock secs ( 0.92 usr +  0.02 sys =  0.94 CPU) @
21.28/s (n=20)
  SEL_SORT:  4 wallclock secs ( 2.76 usr +  0.71 sys =  3.47 CPU) @
5.76/s (n=20)
  SORT_SEL:  3 wallclock secs ( 2.76 usr +  0.71 sys =  3.47 CPU) @
5.76/s (n=20)

SEL_SORT is the original query. SORT_SEL is Samuel Neff's variation.
PERL_SHUFFLE is taking the data out and using List::Util to shuffle it
in memory.

The database is as in the original post with 10,000 rows.


See code below


#!/usr/local/bin/perl

use strict;
use DBI;
use Benchmark;
use List::Util qw(shuffle);;

my $dbh = DBI->connect('dbi:SQLite:dbname=rnd_sort.sqlite','','');

#create();

timethese(20,
  {
    'SEL_SORT' => \&sel_sort,
    'SORT_SEL' => \&sort_sel,
    'PERL_SHUFFLE' => \&perl_shuffle,
  }
);

sub perl_shuffle {
  my $sth = $dbh->prepare(qq{
    SELECT id, a FROM t
  });
  $sth->execute;
  my @res;
  while (my ($id, $a) = $sth->fetchrow_array) {
    push @res, "$id, $a";
  }
  $sth->finish;
  my @shuffled = shuffle @res;
}

sub sel_sort {
  my $sth = $dbh->prepare(qq{
    SELECT id, a FROM t ORDER BY random()
  });
  $sth->execute;
  my @res;
  while (my ($id, $a) = $sth->fetchrow_array) {
    push @res, "$id, $a";
  }
  $sth->finish;
}

sub sort_sel {
  my $sth = $dbh->prepare(qq{
    SELECT id, a FROM (SELECT id, a, random() r FROM t) ORDER BY r
  });
  $sth->execute;
  my @res;
  while (my ($id, $a) = $sth->fetchrow_array) {
    push @res, "$id, $a";
  }
  $sth->finish;
}

# Create 10,000 records with random text
sub create {
  my $sth = $dbh->prepare(qq{
    CREATE TABLE t (id INTEGER PRIMARY KEY, a TEXT)
  });
  $sth->execute;

  $sth = $dbh->prepare(qq{
    INSERT INTO t (id, a) VALUES (?, ?)
  });

  for (0 .. 9999) {
    $sth->execute($_, rnd_str());
  }

  $sth->finish;
}

# Generate a random string of letters
sub rnd_str {my $s; for (0..rand(99)) {$s .= ('a'..'z')[rand(26)]}; $s}
_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to