Author: moeller Date: 2014-04-09 10:46:09 +0000 (Wed, 09 Apr 2014) New Revision: 16628
Removed: trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk Modified: trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk trunk/community/infrastructure/getData/getData.conf.d/human.getData trunk/community/infrastructure/getData/getData.conf.d/mouse.getData Log: Updated and extended download and processing of genomes Human and mouse only - as a pilot. Modified: trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk =================================================================== --- trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk 2014-04-09 10:45:07 UTC (rev 16627) +++ trunk/community/infrastructure/getData/getData.conf.d/Ensembl_genome.mk 2014-04-09 10:46:09 UTC (rev 16628) @@ -1,9 +1,21 @@ SHARED_WGET_OPTIONS=$(shell getData --getWgetOptions) -MIRROR = ftp://ftp.ensembl.org/pub/current_fasta +ENSEMBLVERSION=75 +MIRROR = ftp://ftp.ensembl.org/pub/release-$(ENSEMBLVERSION)/fasta get: - wget $(SHARED_WGET_OPTIONS) $(MIRROR)/$(ORGANISM_L)/dna/$(ORGANISM).$(BUILD).dna.chromosome.*.fa.gz + echo "I: Retrieving data for Ensembl version $(ENSEMBLVERSION) species $(ORGANISM_L)" + wget $(SHARED_WGET_OPTIONS) $(MIRROR)/$(ORGANISM_L)/dna/$(ORGANISM).*.$(ENSEMBLVERSION).dna.chromosome.*.fa.gz unpack: + find . -maxdepth 1 -name "*.fa" -delete for file in *chromosome.*.fa.gz ; do zcat $$file > `basename $$file .gz` ; done + +blast: + if [ -x /usr/bin/makeblastdb ]; then \ + echo "I: Found BLAST+ (preferred) for indexing"; \ + cat *fa | makeblastdb -title $(NICKNAME) -dbtype nucl -out $(NICKNAME); \ + elif [ -x /usr/bin/formatdb ]; then \ + echo "I: Found legacy BLAST for indexing"; \ + cat *fa | formatdb -i /dev/stdin -t $(NICKNAME) -n $(NICKNAME) -p F ; \ + fi Modified: trunk/community/infrastructure/getData/getData.conf.d/human.getData =================================================================== --- trunk/community/infrastructure/getData/getData.conf.d/human.getData 2014-04-09 10:45:07 UTC (rev 16627) +++ trunk/community/infrastructure/getData/getData.conf.d/human.getData 2014-04-09 10:46:09 UTC (rev 16628) @@ -1,10 +1,24 @@ print STDERR "Reading Homo sapiens configuration file\n" if $verbose; -$toBeMirrored{"human.genome"}={ - "name" => "hg19 – Genome Reference Consortium", +$toBeMirrored{"human.hg18.ncbi36.genome"}={ + "name" => "hg18/NCBI36 – Genome Reference Consortium from Ensembl", "tags" => ["human","genome"], - "source" => "make -f /etc/getData.conf.d/human.getData.mk get unpack", -# "post-download" => "make blast" + "source" => "make ORGANISM=Homo_sapiens ORGANISM_L=homo_sapiens ENSEMBLVERSION=54 NICKNAME=hg18 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack", + + "post-download" => "make -f NICKNAME=hg18 -f /etc/getData.conf.d/Ensembl_genome.mk blast", + "depends" => "make", + "recommends" => "ncbi-blast+", + "size" => "39G" }; +$toBeMirrored{"human.hg19.grch37.genome"}={ + "name" => "hg19/GRCh37 – Genome Reference Consortium from Ensembl", + "tags" => ["human","genome"], + "source" => "make ORGANISM=Homo_sapiens ORGANISM_L=homo_sapiens ENSEMBLVERSION=75 NICKNAME=hg19 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack", + "post-download" => "make -f NICKNAME=hg19 -f /etc/getData.conf.d/Ensembl_genome.mk blast", + "depends" => "make", + "recommends" => "ncbi-blast+", + "size" => "39G" +}; + 1; Deleted: trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk =================================================================== --- trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk 2014-04-09 10:45:07 UTC (rev 16627) +++ trunk/community/infrastructure/getData/getData.conf.d/human.getData.mk 2014-04-09 10:46:09 UTC (rev 16628) @@ -1,6 +0,0 @@ -ORGANISM = Homo_sapiens -ORGANISM_L = homo_sapiens -BUILD = GRCh37.56 -NICKNAME = hg19 - -include /etc/getData.conf.d/Ensembl_genome.mk Modified: trunk/community/infrastructure/getData/getData.conf.d/mouse.getData =================================================================== --- trunk/community/infrastructure/getData/getData.conf.d/mouse.getData 2014-04-09 10:45:07 UTC (rev 16627) +++ trunk/community/infrastructure/getData/getData.conf.d/mouse.getData 2014-04-09 10:46:09 UTC (rev 16628) @@ -1,10 +1,17 @@ print STDERR "Reading Mus musculus configuration file\n" if $verbose; -$toBeMirrored{"mouse.genome"}={ - "name" => "mm9 – Mouse Genome Sequencing Consortium", +$toBeMirrored{"mouse.mm9.ncbim37.genome"}={ + "name" => "mm9 – NCBIM37 Mouse Genome Sequencing Consortium from Ensembl", "tags" => ["mouse","genome"], - "source" => "make -f /etc/getData.conf.d/mouse.getData.mk get unpack", -# "post-download" => "make blast" + "source" => "make ENSEMBLVERSION=67 ORGANISM=Mus_musculus ORGANISM_L=mus_musculus NICKNAME=mm9 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack", + "post-download" => "make NICKNAME=mm9 -f /etc/getData.conf.d/Ensembl_genome.mk blast" }; +$toBeMirrored{"mouse.mm10.grcm38.genome"}={ + "name" => "mm10 – GRCm38 Mouse Genome Sequencing Consortium from Ensembl", + "tags" => ["mouse","genome"], + "source" => "make ENSEMBLVERSION=75 ORGANISM=Mus_musculus ORGANISM_L=mus_musculus NICKNAME=mm10 -f /etc/getData.conf.d/Ensembl_genome.mk get unpack", + "post-download" => "make NICKNAME=mm10 -f /etc/getData.conf.d/Ensembl_genome.mk blast" +}; + 1; Deleted: trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk =================================================================== --- trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk 2014-04-09 10:45:07 UTC (rev 16627) +++ trunk/community/infrastructure/getData/getData.conf.d/mouse.getData.mk 2014-04-09 10:46:09 UTC (rev 16628) @@ -1,6 +0,0 @@ -ORGANISM = Mus_musculus -ORGANISM_L = mus_musculus -BUILD = NCBIM37.55 -NICKNAME = mm9 - -include /etc/getData.conf.d/Ensembl_genome.mk _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
