Re: [sqlite] concurrent insert? (again - last mail sent by accident)

Daniel Franke Mon, 02 Aug 2004 03:13:11 -0700

Am Sonntag, 1. August 2004 17:47 schrieb D. Richard Hipp:
> Why don't you post your complete schema (the output of the ".schema"
> command in the command-line shell) and the query that you are
> running.  That will help us to better understand your problem.


I assumed I did something really stupid that anyone would spot but myself.
Here is the schema (formatted to increase readability):

-- snip --
SQLite version 2.8.15
sqlite> .schema
CREATE TABLE haploshare (id1 integer, id2 integer, locus integer,
                         p1p2 integer, p1m2 integer, m1p2 integer,
                         m1m2 integer,
                         FOREIGN KEY (id1) REFERENCES individuals(uniqueid),
                         FOREIGN KEY (id2) REFERENCES individuals(uniqueid),
                         FOREIGN KEY (locus) REFERENCES loci(locus));

CREATE TABLE haplotypes (individual integer,
                         locus integer, paternal varchar(8),
                         maternal varchar(8),
                         FOREIGN KEY (individual) REFERENCES individuals(uniqueid),
                         FOREIGN KEY (locus) REFERENCES loci(locus));

CREATE TABLE individuals (uniqueid integer primary key,
                          pedigree varchar(20),
                          individual varchar(20),
                          father varchar(20), mother varchar(20),
                          gender varchar(2));

CREATE TABLE loci (locus integer primary key, type integer, name
 varchar(20));

CREATE TABLE matched_pairs (id1 integer, id2 integer,
                            locus integer,
                            p1 varchar(8), m1 varchar(8),
                            p2 varchar(8), m2 varchar(8),
                            FOREIGN KEY (id1) REFERENCES individuals(uniqueid),
                            FOREIGN KEY (id2) REFERENCES individuals(uniqueid),
                            FOREIGNKEY (locus) REFERENCES loci(locus));

CREATE TABLE traits (individual integer, locus integer,
                     value double,
                     FOREIGN KEY (individual) REFERENCES individuals(uniqueid),
                     FOREIGN KEY (locus) REFERENCES loci(locus));

CREATE TABLE working_traits(individual,value);

CREATE INDEX idx_haploshare_id1 ON haploshare (id1);
CREATE INDEX idx_haploshare_id2 ON haploshare (id2);
CREATE INDEX idx_haploshare_idpairs ON haploshare (id1, id2);
CREATE UNIQUE INDEX idx_haploshare_keys ON haploshare (id1, id2, locus);
CREATE INDEX idx_haploshare_m1m2 ON haploshare (m1m2);
CREATE INDEX idx_haploshare_m1p2 ON haploshare (m1p2);
CREATE INDEX idx_haploshare_p1m2 ON haploshare (p1m2);
CREATE INDEX idx_haploshare_p1p2 ON haploshare (p1p2);
CREATE UNIQUE INDEX idx_haplotype ON haplotypes (individual, locus);
CREATE INDEX idx_results_locus ON results(locus);
CREATE INDEX idx_results_sim ON results(simulated);
CREATE UNIQUE INDEX idx_working_trait_id ON working_traits(individual);
CREATE INDEX idx_haploshare_id1id2_p1m2 ON haploshare (id1, id2, p1m2)
CREATE INDEX idx_haploshare_id1id2_p1p2 ON haploshare (id1, id2, p1p2)
CREATE INDEX idx_haploshare_id1id2_m1m2 ON haploshare (id1, id2, m1m2)
CREATE INDEX idx_haploshare_id1id2_m1p2 ON haploshare (id1, id2, m1p2)

CREATE TRIGGER haplo_share_of_m1m2 BEFORE INSERT ON haploshare
WHEN new.m1m2 == 0 OR new.m1m2 IS NULL
BEGIN
        UPDATE haploshare
        SET m1m2 = (SELECT count(*) FROM haploshare
                    WHERE m1m2 == -1 AND id1 == new.id1 AND id2 == new.id2) - 1
        WHERE m1m2 == -1 AND id1 == new.id1 AND id2 == new.id2;
END;

CREATE TRIGGER haplo_share_of_m1p2 BEFORE INSERT ON haploshare
WHEN new.m1p2 == 0 OR new.m1p2 IS NULL
BEGIN
        UPDATE haploshare
        SET m1p2 = (SELECT count(*) FROM haploshare
                    WHERE m1p2 == -1 AND id1 == new.id1 AND id2 == new.id2) - 1
        WHERE m1p2 == -1 AND id1 == new.id1 AND id2 == new.id2;
END;

CREATE TRIGGER haplo_share_of_p1m2 BEFORE INSERT ON haploshare
WHEN new.p1m2 == 0 OR new.p1m2 IS NULL
BEGIN
        UPDATE haploshare
        SET p1m2 = (SELECT count(*) FROM haploshare
                    WHERE p1m2 == -1 AND id1 == new.id1 AND id2 == new.id2) - 1
        WHERE p1m2 == -1 AND id1 == new.id1 AND id2 == new.id2;
END;

CREATE TRIGGER haplo_share_of_p1p2 BEFORE INSERT ON haploshare
WHEN new.p1p2 == 0 OR new.p1p2 IS NULL
BEGIN
        UPDATE haploshare
        SET p1p2 = (SELECT count(*) FROM haploshare
                    WHERE p1p2 == -1 AND id1 == new.id1 AND id2 == new.id2) - 1
        WHERE p1p2 == -1 AND id1 == new.id1 AND id2 == new.id2;
END;
-- snap --


At first, I need all usefull/possible pairs of id1/id2/locus, I achieve this
by:
--
INSERT INTO matched_pairs
        SELECT id1.uniqueid, id2.uniqueid, hap1.locus, hap1.paternal,
               hap1.maternal, hap2.paternal, hap2.maternal
        FROM individuals id1
        LEFT JOIN individuals id2
                ON id1.uniqueid < id2.uniqueid AND id1.pedigree == id2.pedigree
        LEFT JOIN haplotypes hap1
                ON id1.uniqueid == hap1.individual
        LEFT JOIN haplotypes hap2
                ON id2.uniqueid == hap2.individual AND hap1.locus == hap2.locus
        WHERE id2.uniqueid IS NOT NULL
        ORDER BY id1.uniqueid, id2.uniqueid, hap1.locus;
--
Creating these pairs is quite a fast operation, less than 60 seconds for the
larger tables.


The second step is to compute the sharings, that is the number of loci which
are equivalent in a range of loci. Here are a few lines from matched_pairs:

id1|id2|locus|p1|m1|p2|m2
 1 | 2 | 2   | 1| 2| 2| 2
 1 | 2 | 3   | 2| 1| 1| 1
 1 | 2 | 4   | 1| 1| 2| 1
 1 | 2 | 5   | 1| 2| 2| 2
 1 | 2 | 6   | 2| 2| 2| 2
 1 | 2 | 7   | 2| 2| 2| 2

The corresponding rows in haploshare look like:

id1|id2|locus|p1p2|p1m2|m1p2|m1m2
 1 | 2 | 2   |  0 |  0 |  2 |  6
 1 | 2 | 2   |  0 |  0 |  2 |  6
 1 | 2 | 2   |  0 |  0 |  0 |  6
 1 | 2 | 2   |  0 |  0 |  3 |  6
 1 | 2 | 2   |  2 |  2 |  3 |  6
 1 | 2 | 2   |  2 |  2 |  3 |  6


The statement to fill in the haploshare table:
--
INSERT INTO haploshare
        SELECT id1, id2, locus
                CASE WHEN p1 == 0 OR p2 == 0 THEN NULL WHEN p1 != p2 THEN 0 ELSE -1 
END,
                CASE WHEN p1 == 0 OR m2 == 0 THEN NULL WHEN p1 != m2 THEN 0 ELSE -1 
END,
                CASE WHEN m1 == 0 OR p2 == 0 THEN NULL WHEN m1 != p2 THEN 0 ELSE -1 
END,
                CASE WHEN m1 == 0 OR m2 == 0 THEN NULL WHEN m1 != m2 THEN 0 ELSE -1 END
        FROM matched_pairs
        ORDER BY id1, id2, locus;
--

The CASE statements will fire the triggers defined before. 
The algorithm works - but takes ages :(

Thanks a lot for your time!

        Daniel


--
Dipl.-Math. (FH) Daniel Franke
Institut fuer Medizinische Biometrie und Statistik
Medizinische Universität zu Luebeck
Ratzeburger Allee 160, Haus 4
23538 Luebeck
Telefon: 0451-500-2786
Telefax: 0451-500-2999
[EMAIL PROTECTED]

Re: [sqlite] concurrent insert? (again - last mail sent by accident)

Reply via email to