Re: [Biojava-l] BioSQL

Len Trigg Sun, 21 Nov 2004 13:18:01 -0800

Mark Schreiber wrote:
> Does anyone have a current BioSQL schema that matches the BioJava 
> bindings? Preferably one for Oracle.


AFAIK, current BioSQL CVS matches what BioJava expects -- Hilmar
recently added the extra table that BioJava uses.  I've also attached
the Oracle schema that I've used (it's a bit simpler than the full
BioSQL Oracle schema).

Cheers,
Len.

-- conventions:
-- <table_name>_id is primary internal id (usually autogenerated)

-- Authors: Ewan Birney, Elia Stupka
-- Contributors: Hilmar Lapp, Aaron Mackey
--
-- Copyright Ewan Birney. You may use, modify, and distribute this code under
-- the same terms as Perl. See the Perl Artistic License.
--
-- comments to biosql - [EMAIL PROTECTED]

--
-- Migration of the MySQL schema to InnoDB by Hilmar Lapp <hlapp at gmx.net>
-- Post-Cape Town changes by Hilmar Lapp.
-- Singapore changes by Hilmar Lapp and Aaron Mackey.
--


CREATE SEQUENCE biodatabase_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE taxon_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE ontology_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_relationship_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE term_path_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE bioentry_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE bioentry_relationship_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE dbxref_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE reference_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE anncomment_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE seqfeature_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE seqfeature_relationship_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;
CREATE SEQUENCE location_pk_seq
        INCREMENT BY 1 START WITH 1 NOMAXVALUE NOMINVALUE NOCYCLE NOORDER;


-- database have bioentries. That is about it.
-- we do not store different versions of a database as different dbids
-- (there is no concept of versions of database). There is a concept of
-- versions of entries. Versions of databases deserve their own table and
-- join to bioentry table for tracking with versions of entries 

CREATE TABLE biodatabase (
        biodatabase_id  int  NOT NULL ,
        name            VARCHAR(128) NOT NULL,
        authority       VARCHAR(128),
        description     VARCHAR2(250),
        PRIMARY KEY (biodatabase_id),
        UNIQUE (name)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX db_auth on biodatabase(authority) TABLESPACE "BIOSQL_INDEX";

-- we could insist that taxa are NCBI taxon id, but on reflection I made this
-- an optional extra line, as many flat file formats do not have the NCBI id
--
-- no organelle/sub species

-- corresponds to the node table of the NCBI taxonomy databaase
CREATE TABLE taxon (
       taxon_id         int  NOT NULL ,
       ncbi_taxon_id    int,
       parent_taxon_id  int ,
       node_rank        VARCHAR(32),
       genetic_code     INT ,
       mito_genetic_code INT ,
       left_value       int ,
       right_value      int ,
       PRIMARY KEY (taxon_id),
       UNIQUE (ncbi_taxon_id),
       UNIQUE (left_value),
       UNIQUE (right_value)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX taxparent ON taxon(parent_taxon_id) TABLESPACE "BIOSQL_INDEX";

-- corresponds to the names table of the NCBI taxonomy databaase
CREATE TABLE taxon_name (
       taxon_id         int  NOT NULL,
       name             VARCHAR(255) NOT NULL,
       name_class       VARCHAR(32) NOT NULL,
       UNIQUE (taxon_id,name,name_class)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX taxnametaxonid ON taxon_name(taxon_id) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX taxnamename    ON taxon_name(name) TABLESPACE "BIOSQL_INDEX";

-- this is the namespace (controlled vocabulary) ontology terms live in
-- we chose to have a separate table for this instead of reusing biodatabase
CREATE TABLE ontology (
        ontology_id        int  NOT NULL ,
        name               VARCHAR(32) NOT NULL,
        definition         VARCHAR2(250),
        PRIMARY KEY (ontology_id),
        UNIQUE (name)
) TABLESPACE "BIOSQL_DATA";

-- any controlled vocab term, everything from full ontology
-- terms eg GO IDs to the various keys allowed as qualifiers
CREATE TABLE term (
        term_id   int  NOT NULL ,
        name               VARCHAR(255) NOT NULL,
        definition         VARCHAR2(250),
        identifier         VARCHAR(40),
        is_obsolete        CHAR(1),
        ontology_id        int  NOT NULL,
        PRIMARY KEY (term_id),
        UNIQUE (name,ontology_id),
        UNIQUE (identifier)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX term_ont ON term(ontology_id) TABLESPACE "BIOSQL_INDEX";

-- ontology terms have synonyms, here is how to store them
CREATE TABLE term_synonym (
       name               VARCHAR(255) NOT NULL,
       term_id            int  NOT NULL,
       PRIMARY KEY (term_id,name)
) TABLESPACE "BIOSQL_DATA";

-- ontology terms to dbxref association: ontology terms have dbxrefs
CREATE TABLE term_dbxref (
        term_id           int  NOT NULL,
        dbxref_id         int  NOT NULL,
        rank              SMALLINT,
        PRIMARY KEY (term_id, dbxref_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX trmdbxref_dbxrefid ON term_dbxref(dbxref_id) TABLESPACE 
"BIOSQL_INDEX";

-- relationship between controlled vocabulary / ontology term
-- we use subject/predicate/object but this could also
-- be thought of as child/relationship-type/parent.
-- the subject/predicate/object naming is better as we
-- can think of the graph as composed of statements.
--
-- we also treat the relationshiptypes / predicates as
-- controlled terms in themselves; this is quite useful
-- as a lot of systems (eg GO) will soon require
-- ontologies of relationship types (eg subtle differences
-- in the partOf relationship)
--
-- this table probably won''t be filled for a while, the core
-- will just treat ontologies as flat lists of terms

CREATE TABLE term_relationship (
        term_relationship_id int  NOT NULL ,
        subject_term_id int  NOT NULL,
        predicate_term_id    int  NOT NULL,
        object_term_id       int  NOT NULL,
        ontology_id     int  NOT NULL,
        PRIMARY KEY (term_relationship_id),
        UNIQUE (subject_term_id,predicate_term_id,object_term_id,ontology_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX trmrel_predicateid ON term_relationship(predicate_term_id) 
TABLESPACE "BIOSQL_INDEX";
CREATE INDEX trmrel_objectid ON term_relationship(object_term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX trmrel_ontid ON term_relationship(ontology_id) TABLESPACE 
"BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX ontrel_subjectid ON term_relationship(subject_term_id);

-- the infamous transitive closure table on ontology term relationships
-- this is a warehouse approach - you will need to update this regularly
--
-- the triple of (subject, predicate, object) is the same as for ontology
-- relationships, with the exception of predicate being the greatest common
-- denominator of the relationships types visited in the path (i.e., if
-- relationship type A is-a relationship type B, the greatest common
-- denominator for path containing both types A and B is B)
--
-- See the GO database or Chado schema for other (and possibly better
-- documented) implementations of the transitive closure table approach.
CREATE TABLE term_path (
        term_path_id         int  NOT NULL ,
        subject_term_id      int  NOT NULL,
        predicate_term_id    int  NOT NULL,
        object_term_id       int  NOT NULL,
        ontology_id          int  NOT NULL,
        distance             int ,
        PRIMARY KEY (term_path_id),
        UNIQUE 
(subject_term_id,predicate_term_id,object_term_id,ontology_id,distance)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX trmpath_predicateid ON term_path(predicate_term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX trmpath_objectid ON term_path(object_term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX trmpath_ontid ON term_path(ontology_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);


-- BioJava addition
CREATE TABLE term_relationship_term (
        term_relationship_id    int  DEFAULT 0 NOT NULL,
        term_id                 int  DEFAULT 0 NOT NULL,
        PRIMARY KEY (term_relationship_id,term_id),
) TABLESPACE "BIOSQL_DATA";

ALTER TABLE term_relationship_term ADD CONSTRAINT uni_term_relationship_id 
        UNIQUE (term_relationship_id) ENABLE VALIDATE;
ALTER TABLE term_relationship_term ADD CONSTRAINT uni_term_id 
        UNIQUE (term_id) ENABLE VALIDATE;


-- we can be a bioentry without a biosequence, but not visa-versa
-- most things are going to be keyed off bioentry_id
--
-- accession is the stable id, display_id is a potentially volatile,
-- human readable name.
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
-- not all entries have a taxon, but many do.
-- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
--
-- Name maps to display_id in bioperl. We have a different column name
-- here to avoid confusion with the naming convention for foreign keys.

CREATE TABLE bioentry (
        bioentry_id     int  NOT NULL ,
        biodatabase_id  int  NOT NULL,
        taxon_id        int ,
        name            VARCHAR(40) NOT NULL,
        accession       VARCHAR(40) NOT NULL,
        identifier      VARCHAR(40),
        division        VARCHAR(6),
        description     VARCHAR2(250),
        version         SMALLINT  NOT NULL, 
        PRIMARY KEY (bioentry_id),
        UNIQUE (accession,biodatabase_id,version),
        UNIQUE (identifier)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX bioentry_name ON bioentry(name) TABLESPACE "BIOSQL_INDEX";
CREATE INDEX bioentry_db   ON bioentry(biodatabase_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX bioentry_tax  ON bioentry(taxon_id) TABLESPACE "BIOSQL_INDEX";



--
-- bioentry-bioentry relationships: these are typed
--
CREATE TABLE bioentry_relationship (
        bioentry_relationship_id int  NOT NULL ,
        object_bioentry_id      int  NOT NULL,
        subject_bioentry_id     int  NOT NULL,
        term_id                 int  NOT NULL,
        rank                    INT,
        PRIMARY KEY (bioentry_relationship_id),
        UNIQUE (object_bioentry_id,subject_bioentry_id,term_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX bioentryrel_trm   ON bioentry_relationship(term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX bioentryrel_child ON bioentry_relationship(subject_bioentry_id) 
TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);

-- for deep (depth > 1) bioentry relationship trees we need a transitive
-- closure table too
CREATE TABLE bioentry_path (
        object_bioentry_id      int  NOT NULL,
        subject_bioentry_id     int  NOT NULL,
        term_id                 int  NOT NULL,
        distance                int ,
        UNIQUE (object_bioentry_id,subject_bioentry_id,term_id,distance)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX bioentrypath_trm   ON bioentry_path(term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX bioentrypath_child ON bioentry_path(subject_bioentry_id) 
TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX bioentrypath_parent ON bioentry_path(object_bioentry_id);

-- some bioentries will have a sequence
-- biosequence because sequence is sometimes a reserved word

CREATE TABLE biosequence (
        bioentry_id     int  NOT NULL,
        version         SMALLINT, 
        length          int,
        alphabet        VARCHAR(10),
        seq             LONG,
        PRIMARY KEY (bioentry_id)
) TABLESPACE "BIOSQL_DATA";

-- add these only if you want them:
-- ALTER TABLE biosequence ADD COLUMN ( isoelec_pt NUMERIC(4,2) );
-- ALTER TABLE biosequence ADD COLUMN ( mol_wgt DOUBLE PRECISION );
-- ALTER TABLE biosequence ADD COLUMN ( perc_gc DOUBLE PRECISION );

-- database cross-references (e.g., GenBank:AC123456.1)
--
-- Version may be unknown, may be undefined, or may not exist for a certain
-- accession or database (namespace). We require it here to avoid RDBMS-
-- dependend enforcement variants (version is in a compound alternative key),
-- and to simplify query construction for UK look-ups. If there is no version
-- the convention is to put 0 (zero) here. Likewise, a record with a version
-- of zero means the version is to be interpreted as NULL.
--
CREATE TABLE dbxref (
        dbxref_id       int  NOT NULL ,
        dbname          VARCHAR(40) NOT NULL,
        accession       VARCHAR(40) NOT NULL,
        version         SMALLINT  NOT NULL,
        PRIMARY KEY (dbxref_id),
        UNIQUE(accession, dbname, version)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX dbxref_db  ON dbxref(dbname) TABLESPACE "BIOSQL_INDEX";

-- for roundtripping embl/genbank, we need to have the "optional ID"
-- for the dbxref.
--
-- another use of this table could be for storing
-- descriptive text for a dbxref. for example, we may want to
-- know stuff about the interpro accessions we store (without
-- importing all of interpro), so we can attach the text
-- description as a synonym
CREATE TABLE dbxref_qualifier_value (
        dbxref_id               int  NOT NULL,
        term_id                 int  NOT NULL,
        rank                    INT DEFAULT 0 NOT NULL,
        value                   VARCHAR2(100),
        PRIMARY KEY (dbxref_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value(dbxref_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value(term_id) TABLESPACE 
"BIOSQL_INDEX";

-- Direct dblinks. It is tempting to do this
-- from bioentry_id to bioentry_id. But that wont work
-- during updates of one database - we will have to edit
-- this table each time. Better to do the join through accession
-- and db each time. Should be almost as cheap

CREATE TABLE bioentry_dbxref ( 
        bioentry_id        int  NOT NULL,
        dbxref_id          int  NOT NULL,
        rank               SMALLINT,
        PRIMARY KEY (bioentry_id,dbxref_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX dblink_dbx  ON bioentry_dbxref(dbxref_id) TABLESPACE 
"BIOSQL_INDEX";

-- We can have multiple references per bioentry, but one reference
-- can also be used for the same bioentry.
--
-- No two references can reference the same reference database entry
-- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.

CREATE TABLE reference (
        reference_id       int  NOT NULL ,
        dbxref_id          int ,
        location           VARCHAR2(100) NOT NULL,
        title              VARCHAR2(100),
        authors            VARCHAR2(100) NOT NULL,
        crc                VARCHAR(32),
        PRIMARY KEY (reference_id),
        UNIQUE (dbxref_id),
        UNIQUE (crc)
) TABLESPACE "BIOSQL_DATA";

-- bioentry to reference associations
CREATE TABLE bioentry_reference (
        bioentry_id     int  NOT NULL,
        reference_id    int  NOT NULL,
        start_pos       int,
        end_pos         int,
        rank            SMALLINT DEFAULT 0 NOT NULL,
        PRIMARY KEY(bioentry_id,reference_id,rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX bioentryref_ref ON bioentry_reference(reference_id) TABLESPACE 
"BIOSQL_INDEX";


-- We can have multiple comments per seqentry, and
-- comments can have embedded '\n' characters

CREATE TABLE anncomment (
        comment_id      int  NOT NULL ,
        bioentry_id     int  NOT NULL,
        comment_text    VARCHAR2(100) NOT NULL,
        rank            SMALLINT DEFAULT 0 NOT NULL,
        PRIMARY KEY (comment_id),
        UNIQUE(bioentry_id, rank)
) TABLESPACE "BIOSQL_DATA";


-- tag/value and ontology term annotation for bioentries goes here
CREATE TABLE bioentry_qualifier_value (
        bioentry_id             int  NOT NULL,
        term_id                 int  NOT NULL,
        value                   VARCHAR2(100),
        rank                    INT DEFAULT 0 NOT NULL,
        UNIQUE (bioentry_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value(term_id) TABLESPACE 
"BIOSQL_INDEX";

-- feature table. We cleanly handle
--   - simple locations
--   - split locations
--   - split locations on remote sequences

CREATE TABLE seqfeature (
        seqfeature_id           int  NOT NULL ,
        bioentry_id             int  NOT NULL,
        type_term_id            int  NOT NULL,
        source_term_id          int  NOT NULL,
        display_name            VARCHAR(64),
        rank                    SMALLINT  DEFAULT 0  NOT NULL,
        PRIMARY KEY (seqfeature_id),
        UNIQUE (bioentry_id,type_term_id,source_term_id,rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX seqfeature_trm  ON seqfeature(type_term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX seqfeature_fsrc ON seqfeature(source_term_id) TABLESPACE 
"BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);

-- seqfeatures can be arranged in containment hierarchies.
-- one can imagine storing other relationships between features,
-- in this case the term_id can be used to type the relationship

CREATE TABLE seqfeature_relationship (
        seqfeature_relationship_id int  NOT NULL ,
        object_seqfeature_id    int  NOT NULL,
        subject_seqfeature_id   int  NOT NULL,
        term_id         int  NOT NULL,
        rank                    INT,
        PRIMARY KEY (seqfeature_relationship_id),
        UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX seqfeaturerel_trm   ON seqfeature_relationship(term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX seqfeaturerel_child ON 
seqfeature_relationship(subject_seqfeature_id) TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON 
seqfeature_relationship(object_seqfeature_id);

-- for deep (depth > 1) seqfeature relationship trees we need a transitive
-- closure table too
CREATE TABLE seqfeature_path (
        object_seqfeature_id    int  NOT NULL,
        subject_seqfeature_id   int  NOT NULL,
        term_id                 int  NOT NULL,
        distance                int ,
        UNIQUE (object_seqfeature_id,subject_seqfeature_id,term_id,distance)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX seqfeaturepath_trm   ON seqfeature_path(term_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX seqfeaturepath_child ON seqfeature_path(subject_seqfeature_id) 
TABLESPACE "BIOSQL_INDEX";
-- you may want to add this for mysql because MySQL often is broken with
-- respect to using the composite index for the initial keys
--CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);

-- tag/value associations - or ontology annotations
CREATE TABLE seqfeature_qualifier_value (
        seqfeature_id           int  NOT NULL,
        term_id                 int  NOT NULL,
        rank                    SMALLINT DEFAULT 0  NOT NULL,
        value                   VARCHAR2(4000) NOT NULL,
        PRIMARY KEY (seqfeature_id,term_id,rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value(term_id) 
TABLESPACE "BIOSQL_INDEX";
   
-- DBXrefs for features. This is necessary for genome oriented viewpoints,
-- where you have a few have long sequences (contigs, or chromosomes) with many
-- features on them. In that case the features are the semantic scope for
-- their annotation bundles, not the bioentry they are attached to.

CREATE TABLE seqfeature_dbxref ( 
        seqfeature_id      int  NOT NULL,
        dbxref_id          int  NOT NULL,
        rank               SMALLINT,
        PRIMARY KEY (seqfeature_id,dbxref_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX feadblink_dbx  ON seqfeature_dbxref(dbxref_id) TABLESPACE 
"BIOSQL_INDEX";

-- basically we model everything as potentially having
-- any number of locations, ie, a split location. SimpleLocations
-- just have one location. We need to have a location id for the qualifier
-- associations of fuzzy locations.

-- please do not try to model complex assemblies with this thing. It wont
-- work. Check out the ensembl schema for this.

-- we allow nulls for start/end - this is useful for fuzzies as
-- standard range queries will not be included

-- for remote locations, the join to make is to DBXref
-- the FK to term is a possibility to store the type of the
-- location for determining in one hit whether it's a fuzzy or not

CREATE TABLE location (
        location_id             int  NOT NULL ,
        seqfeature_id           int  NOT NULL,
        dbxref_id               int ,
        term_id                 int ,
        start_pos               int,
        end_pos                 int,
        strand                  INT NOT NULL,
        rank                    SMALLINT DEFAULT 0  NOT NULL,
        PRIMARY KEY (location_id),
        UNIQUE (seqfeature_id, rank)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX seqfeatureloc_start ON location(start_pos, end_pos) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX seqfeatureloc_dbx   ON location(dbxref_id) TABLESPACE 
"BIOSQL_INDEX";
CREATE INDEX seqfeatureloc_trm   ON location(term_id) TABLESPACE "BIOSQL_INDEX";

-- location qualifiers - mainly intended for fuzzies but anything
-- can go in here
-- some controlled vocab terms have slots;
-- fuzzies could be modeled as min_start(5), max_start(5)
-- 
-- there is no restriction on extending the fuzzy ontology
-- for your own nefarious aims, although the bio* apis will
-- most likely ignore these
CREATE TABLE location_qualifier_value (
        location_id             int  NOT NULL,
        term_id                 int  NOT NULL,
        value                   VARCHAR(255) NOT NULL,
        int_value               int,
        PRIMARY KEY (location_id,term_id)
) TABLESPACE "BIOSQL_DATA";

CREATE INDEX locationqual_trm ON location_qualifier_value(term_id) TABLESPACE 
"BIOSQL_INDEX";

--
-- Create the foreign key constraints
--

-- ontology term

ALTER TABLE term ADD CONSTRAINT FKont_term
        FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
        ON DELETE CASCADE;

-- term synonyms

ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
        FOREIGN KEY (term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;

-- term_dbxref

ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
        FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
        ON DELETE CASCADE;
ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
      FOREIGN KEY (term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;

-- term_relationship

ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
        FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
        FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
        FOREIGN KEY (object_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_relationship ADD CONSTRAINT FKterm_trmrel
        FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
        ON DELETE CASCADE;

-- term_path

ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
        FOREIGN KEY (subject_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
        FOREIGN KEY (predicate_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
        FOREIGN KEY (object_term_id) REFERENCES term(term_id)
        ON DELETE CASCADE;
ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
        FOREIGN KEY (ontology_id) REFERENCES ontology(ontology_id)
        ON DELETE CASCADE;

-- taxon, taxon_name

-- unfortunately, we can't constrain parent_taxon_id as it is violated
-- occasionally by the downloads available from NCBI
-- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
--         FOREIGN KEY (parent_taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
        FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id)
        ON DELETE CASCADE;

-- bioentry

ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
        FOREIGN KEY (taxon_id) REFERENCES taxon(taxon_id);
ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
        FOREIGN KEY (biodatabase_id) REFERENCES biodatabase(biodatabase_id);

-- bioentry_relationship

ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
        FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;
ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
        FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;

-- bioentry_path

ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
        FOREIGN KEY (object_bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;
ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
        FOREIGN KEY (subject_bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;

-- biosequence

ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
        FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;

-- comment

ALTER TABLE anncomment ADD CONSTRAINT FKbioentry_comment
        FOREIGN KEY(bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;

-- bioentry_dbxref

ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
        FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;
ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
        FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
        ON DELETE CASCADE;

-- dbxref_qualifier_value

ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
        FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
        ON DELETE CASCADE;

-- bioentry_reference

ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
        FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;
ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
        FOREIGN KEY (reference_id) REFERENCES reference(reference_id)
        ON DELETE CASCADE;

-- bioentry_qualifier_value

ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
        FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;
ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
        FOREIGN KEY (term_id) REFERENCES term(term_id);

-- reference 
ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
      FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;

-- seqfeature

ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
        FOREIGN KEY (type_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
        FOREIGN KEY (source_term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
        FOREIGN KEY (bioentry_id) REFERENCES bioentry(bioentry_id)
        ON DELETE CASCADE;

-- seqfeature_relationship

ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
        FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;
ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
        FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;

-- seqfeature_path

ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
        FOREIGN KEY (object_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;
ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
        FOREIGN KEY (subject_seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;

-- seqfeature_qualifier_value
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
        FOREIGN KEY (term_id) REFERENCES term(term_id);
ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
        FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;

-- seqfeature_dbxref

ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
        FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;
ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
        FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id)
        ON DELETE CASCADE;

-- location

ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
        FOREIGN KEY (seqfeature_id) REFERENCES seqfeature(seqfeature_id)
        ON DELETE CASCADE;
ALTER TABLE location ADD CONSTRAINT FKdbxref_location
        FOREIGN KEY (dbxref_id) REFERENCES dbxref(dbxref_id);
ALTER TABLE location ADD CONSTRAINT FKterm_featloc
        FOREIGN KEY (term_id) REFERENCES term(term_id);

-- location_qualifier_value

ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
        FOREIGN KEY (location_id) REFERENCES location(location_id)
        ON DELETE CASCADE;
ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
        FOREIGN KEY (term_id) REFERENCES term(term_id);


--
-- Triggers for automatic primary key generation and other sanity checks
--

CREATE OR REPLACE TRIGGER BID_location
  BEFORE INSERT
  on location
  -- 
  for each row
BEGIN
IF :new.location_id IS NULL THEN
    SELECT location_pk_seq.nextval INTO :new.location_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_seqfeature
  BEFORE INSERT
  on seqfeature
  -- 
  for each row
BEGIN
IF :new.seqfeature_id IS NULL THEN
    SELECT seqfeature_pk_seq.nextval INTO :new.seqfeature_id FROM DUAL;
END IF;
END;
/


CREATE TRIGGER BID_seqfeature_relationship
  BEFORE INSERT
  on seqfeature_relationship
  -- 
  for each row
BEGIN
IF :new.seqfeature_relationship_id IS NULL THEN
    SELECT seqfeature_relationship_pk_seq.nextval INTO 
:new.seqfeature_relationship_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_anncomment
  BEFORE INSERT
  on anncomment
  -- 
  for each row
BEGIN
IF :new.comment_id IS NULL THEN
    SELECT anncomment_pk_seq.nextval INTO :new.comment_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_reference
  BEFORE INSERT
  on reference
  -- 
  for each row
BEGIN
IF :new.reference_id IS NULL THEN
    SELECT reference_pk_seq.nextval INTO :new.reference_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_bioentry_relationship
  BEFORE INSERT
  on bioentry_relationship
  -- 
  for each row
BEGIN
IF :new.bioentry_relationship_id IS NULL THEN
    SELECT bioentry_relationship_pk_seq.nextval INTO 
:new.bioentry_relationship_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_bioentry
  BEFORE INSERT
  on bioentry
  -- 
  for each row
BEGIN
IF :new.bioentry_id IS NULL THEN
    SELECT bioentry_pk_seq.nextval INTO :new.bioentry_id FROM DUAL;
END IF;
-- IF :new.Division IS NULL THEN
--    :new.Division := 'UNK';
-- END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_term
  BEFORE INSERT
  on term
  -- 
  for each row
BEGIN
IF :new.term_id IS NULL THEN
    SELECT term_pk_seq.nextval INTO :new.term_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_term_relationship
  BEFORE INSERT
  on term_relationship
  -- 
  for each row
BEGIN
IF :new.term_relationship_id IS NULL THEN
    SELECT term_relationship_pk_seq.nextval INTO :new.term_relationship_id FROM 
DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_term_path
  BEFORE INSERT
  on term_path
  -- 
  for each row
BEGIN
IF :new.term_path_id IS NULL THEN
    SELECT term_path_pk_seq.nextval INTO :new.term_path_id FROM DUAL;
END IF;
END;
/

CREATE OR REPLACE TRIGGER BID_ontology
  BEFORE INSERT
  on ontology
  -- 
  for each row
BEGIN
IF :new.ontology_id IS NULL THEN
    SELECT ontology_pk_seq.nextval INTO :new.ontology_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_taxon
  BEFORE INSERT
  on taxon
  -- 
  for each row
BEGIN
IF :new.taxon_id IS NULL THEN
    SELECT taxon_pk_seq.nextval INTO :new.taxon_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_biodatabase
  BEFORE INSERT
  on biodatabase
  -- 
  for each row
BEGIN
IF :new.biodatabase_id IS NULL THEN
    SELECT biodatabase_pk_seq.nextval INTO :new.biodatabase_id FROM DUAL;
END IF;
END;
/


CREATE OR REPLACE TRIGGER BID_dbxref
  BEFORE INSERT
  on dbxref
  -- 
  for each row
BEGIN
IF :new.dbxref_id IS NULL THEN
    SELECT dbxref_pk_seq.nextval INTO :new.dbxref_id FROM DUAL;
END IF;
END;
/

_______________________________________________
Biojava-l mailing list  -  [EMAIL PROTECTED]
http://biojava.org/mailman/listinfo/biojava-l

Re: [Biojava-l] BioSQL

Reply via email to