This is an automated email from the git hooks/post-receive script. afif pushed a commit to branch master in repository ensembl-test.
commit 26e1f713ff409bfccf7adda8d071e9dafa59477f Author: Afif Elghraoui <[email protected]> Date: Tue May 17 01:13:21 2016 -0700 Imported Upstream version 84+20160225+0928 --- LICENSE | 201 ++++++ cpanfile | 11 + modules/Bio/EnsEMBL/Test/CLEAN.pl | 37 ++ modules/Bio/EnsEMBL/Test/DumpDatabase.pm | 251 ++++++++ modules/Bio/EnsEMBL/Test/FTPD.pm | 104 ++++ modules/Bio/EnsEMBL/Test/MultiTestDB.pm | 807 +++++++++++++++++++++++++ modules/Bio/EnsEMBL/Test/MultiTestDB/SQLite.pm | 147 +++++ modules/Bio/EnsEMBL/Test/MultiTestDB/mysql.pm | 106 ++++ modules/Bio/EnsEMBL/Test/RunPipeline.pm | 444 ++++++++++++++ modules/Bio/EnsEMBL/Test/StaticHTTPD.pm | 109 ++++ modules/Bio/EnsEMBL/Test/TestUtils.pm | 635 +++++++++++++++++++ scripts/MultiTestDB.conf.example | 26 + scripts/README | 66 ++ scripts/README.dump_test_schema | 11 + scripts/cleanup_databases.pl | 87 +++ scripts/clone_core_database.pl | 552 +++++++++++++++++ scripts/convert_test_schemas.sh | 62 ++ scripts/dump_test_schema.pl | 198 ++++++ scripts/harness.sh | 71 +++ scripts/load_database.pl | 134 ++++ scripts/patch_test_databases.pl | 264 ++++++++ scripts/runtests.pl | 195 ++++++ 22 files changed, 4518 insertions(+) diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..150e5dc --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/cpanfile b/cpanfile new file mode 100644 index 0000000..c6a99c4 --- /dev/null +++ b/cpanfile @@ -0,0 +1,11 @@ +requires 'DBI'; +requires 'DBD::mysql'; +requires 'Test::More'; +requires 'Devel::Peek'; +requires 'Devel::Cycle'; +requires 'Error'; +requires 'PadWalker'; +requires 'Test::Builder::Module'; +requires 'IO::String'; +requires 'IO::Scalar'; +requires 'Test::FTP::Server'; diff --git a/modules/Bio/EnsEMBL/Test/CLEAN.pl b/modules/Bio/EnsEMBL/Test/CLEAN.pl new file mode 100644 index 0000000..d294c62 --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/CLEAN.pl @@ -0,0 +1,37 @@ +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use File::Basename; +use File::Spec; +use Test::More; + +use Bio::EnsEMBL::Test::MultiTestDB; + +diag 'Starting database and files cleaning up...'; + +my $curr_file = __FILE__; +my $db_conf = Bio::EnsEMBL::Test::MultiTestDB->get_db_conf(dirname(__FILE__)); + +foreach my $species ( keys %{ $db_conf->{'databases'} } ) { + my $multi = Bio::EnsEMBL::Test::MultiTestDB->new($species); +} + +note "Deleting $curr_file"; +my $result = unlink $curr_file; +ok($result, 'Unlink of '.$curr_file.' worked'); + +done_testing(); \ No newline at end of file diff --git a/modules/Bio/EnsEMBL/Test/DumpDatabase.pm b/modules/Bio/EnsEMBL/Test/DumpDatabase.pm new file mode 100644 index 0000000..32ddebe --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/DumpDatabase.pm @@ -0,0 +1,251 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::DumpDatabase; + +use strict; +use warnings; + +use Bio::EnsEMBL::Utils::IO qw/work_with_file/; +use Bio::EnsEMBL::Utils::Scalar qw/assert_ref/; +use File::Spec; +use File::Path qw/mkpath/; +use Scalar::Util qw/looks_like_number/; + +sub new { + my ($class, $dba, $base_directory, $old_schema_details, $new_schema_details) = @_; + my $self = bless({}, (ref($class) || $class)); + die "No DBA given" unless $dba; + die "No directory given" unless $base_directory; + + $self->dba($dba); + $self->base_directory($base_directory); + $self->old_schema_details($old_schema_details); + $self->new_schema_details($new_schema_details); + return $self; +} + +sub dump { + my ($self) = @_; + $self->dump_sql(); + $self->dump_tables(); + $self->delete_tables(); + return; +} + +sub dba { + my ($self, $dba) = @_; + if(defined $dba) { + assert_ref($dba, 'Bio::EnsEMBL::DBSQL::DBAdaptor', 'source DBAdaptor'); + $self->{'dba'} = $dba; + } + return $self->{'dba'}; +} + +sub base_directory { + my ($self, $base_directory) = @_; + if(defined $base_directory) { + die "Cannot find the directory $base_directory" if ! -d $base_directory; + $self->{'base_directory'} = $base_directory; + } + return $self->{'base_directory'}; +} + +sub old_schema_details { + my ($self, $old_schema_details) = @_; + $self->{'old_schema_details'} = $old_schema_details if defined $old_schema_details; + return $self->{'old_schema_details'}; +} + +sub new_schema_details { + my ($self, $new_schema_details) = @_; + $self->{'new_schema_details'} = $new_schema_details if defined $new_schema_details; + return $self->{'new_schema_details'}; +} + +sub directory { + my ($self) = @_; + my $dir = File::Spec->catdir($self->base_directory(), $self->production_name(), $self->group()); + if(! -d $dir) { + mkpath $dir; + } + return $dir; +} + +sub production_name { + my ($self) = @_; + eval { + my $mc = $self->dba->get_MetaContainer(); + if($mc->can('get_production_name')) { + return $mc->get_production_name(); + } + }; + return $self->dba->species; +} + +sub group { + my ($self) = @_; + return $self->dba->group; +} + +sub dump_sql { + my ($self) = @_; + my $file = File::Spec->catfile($self->directory(), 'table.sql'); + my $h = $self->dba->dbc->sql_helper(); + + my @real_tables = @{$self->_tables()}; + my @views = @{$self->_views()}; + + my $schema_differences = $self->_schema_differences(); + #Do not redump if there were no schema changes (could be just a data patch) + return if ! $schema_differences; + + work_with_file($file, 'w', sub { + my ($fh) = @_; + foreach my $table (@real_tables) { + my $sql = $h->execute_single_result(-SQL => "show create table ${table}", -CALLBACK => sub { return $_[0]->[1] }); + print $fh "$sql;\n\n"; + } + foreach my $view (@views) { + my $sql = $h->execute_single_result(-SQL => "show create view ${view}", -CALLBACK => sub { return $_[0]->[1] }); + print $fh "$sql;\n\n"; + } + return; + }); + + return; +} + +sub dump_tables { + my ($self) = @_; + my $tables = $self->_tables(); + foreach my $table (@{$tables}) { + my $data_difference = $self->_data_differences($table); + #Skip this iteration of the loop if there were no data differences + next if ! $data_difference; + $self->dump_table($table); + } + return; +} + +sub dump_table { + my ($self, $table) = @_; + my $response = $self->dba->dbc->sql_helper()->execute_simple( + -SQL => "select count(*) from $table"); + return if ($response->[0] == 0); + my $file = File::Spec->catfile($self->directory(), $table.'.txt'); + work_with_file($file, 'w', sub { + my ($fh) = @_; + $self->dba->dbc->sql_helper()->execute_no_return( + -SQL => "select * from $table", + -CALLBACK => sub { + my ($row) = @_; + my @copy; + foreach my $e (@{$row}) { + if(!defined $e) { + $e = '\N'; + } + elsif(!looks_like_number($e)) { + $e =~ s/\n/\\\n/g; + $e =~ s/\t/\\\t/g; + } + push(@copy, $e); + } + my $line = join(qq{\t}, @copy); + print $fh $line, "\n"; + } + ); + }); + return; +} + +sub delete_tables { + my ($self) = @_; + my $old_schema_details = $self->old_schema_details(); + my $new_schema_details = $self->new_schema_details(); + return unless $old_schema_details && $new_schema_details; + foreach my $old_table (keys %{$old_schema_details}) { + if(! exists $new_schema_details->{$old_table}) { + my $file = File::Spec->catfile($self->directory(), $old_table.'.txt'); + unlink $file or die "Cannot unlink the file '$file': $!"; + } + } + return; +} + +sub _tables { + my ($self) = @_; + my $lookup = $self->_table_lookup(); + return [sort grep { $lookup->{$_} ne 'VIEW' } keys %$lookup ]; +} + +sub _views { + my ($self) = @_; + my $lookup = $self->_table_lookup(); + return [sort grep { $lookup->{$_} eq 'VIEW' } keys %$lookup]; +} + +sub _table_lookup { + my ($self) = @_; + if(! $self->{_table_lookup}) { + my $h = $self->dba->dbc->sql_helper(); + my $lookup = $h->execute_into_hash(-SQL => 'select TABLE_NAME, TABLE_TYPE from information_schema.TABLES where TABLE_SCHEMA = DATABASE()'); + $self->{_table_lookup} = $lookup; + } + return $self->{_table_lookup}; +} + +sub _schema_differences { + my ($self) = @_; + my $old_schema_details = $self->old_schema_details(); + my $new_schema_details = $self->new_schema_details(); + + #Assume there is a difference if none or 1 hash was provided + return 1 unless $old_schema_details && $new_schema_details; + + my $old_schema_concat = join(qq{\n}, map { $old_schema_details->{$_}->{create} } sort keys %$old_schema_details); + my $new_schema_concat = join(qq{\n}, map { $new_schema_details->{$_}->{create} || '' } sort keys %$old_schema_details); + + return ( $old_schema_concat ne $new_schema_concat ) ? 1 : 0; +} + +sub _data_differences { + my ($self, $table) = @_; + my $old_schema_details = $self->old_schema_details(); + my $new_schema_details = $self->new_schema_details(); + + #Assume there is a difference if none or 1 hash was provided + return 1 unless $old_schema_details && $new_schema_details; + return 1 if ! exists $old_schema_details->{$table}; + return 1 if ! exists $new_schema_details->{$table}; + return ( $old_schema_details->{$table}->{checksum} ne $new_schema_details->{$table}->{checksum}) ? 1 : 0; +} + +sub _delete_table_file { + my ($self, $table) = @_; + + return; +} + +sub DESTROY { + my ($self) = @_; + $self->dba->dbc->disconnect_if_idle(); + return; +} + +1; \ No newline at end of file diff --git a/modules/Bio/EnsEMBL/Test/FTPD.pm b/modules/Bio/EnsEMBL/Test/FTPD.pm new file mode 100644 index 0000000..bfdd36f --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/FTPD.pm @@ -0,0 +1,104 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::FTPD; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::FTPD; + +=head1 SYNOPSIS + + my $root_dir = '/path/to/static/files'; + my $user = 'testuser'; + my $pass = 'testpass'; + my $ftpd = Bio::EnsEMBL::Test::FTPD->new($user, $pass, $root_dir); + + my $ftp_uri = "ftp://$user:$pass\@localhost:" . $ftpd->port . '/myfiletoretreive.txt'; + ok(do_FTP($ftp_uri), 'Basic successful get'); + +=head1 DESCRIPTION + +This module creates a simple FTP daemon with a root directory and credentials +given at instantiation. It uses Net::FTPServer internally so all basic FTP +functionality is available. + +If the root directory doesn't exist an error will be raised. + +The FTP daemon is destroyed on exit. + +=cut + +use strict; +use warnings; + +use Test::More; +use Test::Exception; +use Test::TCP; +require_ok('Test::FTP::Server'); + +use base 'Test::Builder::Module'; + +=head2 new + + Arg[1] : string $user + Username for ftp server authentication + Arg[2] : string $pass + Password for ftp server authentication + Arg[1] : string $root_dir + The directory where files to be returned by + the FTPD live + + Returntype : Test::TCP instance, where listening + port can be retreived + +=cut + +sub new { + my ($self, $user, $pass, $root_dir) = @_; + + # Do we have a valid DocumentRoot + ok( -d $root_dir, 'Root dir for HTTPD is valid'); + + # Create an FTPD wrapped in a Test::TCP + # instance, Test::TCP finds an unused port + # for the FTPD to bind to + my $ftpd = Test::TCP->new( + code => sub { + my $port = shift; + + my $ftpd = Test::FTP::Server->new( + 'users' => [{ + 'user' => $user, + 'pass' => $pass, + 'root' => $root_dir, + }], + 'ftpd_conf' => { + 'port' => $port, + 'daemon mode' => 1, + 'run in background' => 0, + }, + )->run; + }); + + return $ftpd; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/MultiTestDB.pm b/modules/Bio/EnsEMBL/Test/MultiTestDB.pm new file mode 100644 index 0000000..9d8ca6e --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/MultiTestDB.pm @@ -0,0 +1,807 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::MultiTestDB; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::MultiTestDB + +=head1 SYNOPSIS + + my $test = Bio::EnsEMBL::Test::MultiTestDB->new(); #uses homo_sapiens by default + my $dba = $test->get_DBAdaptor(); #uses core by default + + my $dros = Bio::EnsEMBL::Test::MultiTestDB->new('drosophila_melanogaster'); + my $dros_rnaseq_dba = $dros->get_DBAdaptor('rnaseq'); + +=head1 DESCRIPTION + +This module automatically builds the specified database on demand and provides +a number of methods for saving, restoring and hiding databases tables in +that database. + +If the environment variable C<RUNTESTS_HARNESS> is set then this code will +not attempt a cleanup of the database when the object is destroyed. When used +in conjunction with C<runtests.pl> this means we create 1 database and reuse +it for all tests at the expense of test isolation. Your tests should leave the +database in a consistent state for the next test case and never assume +perfect isolation. + +You can also use the env variable C<RUNTESTS_HARNESS_NORESTORE> which avoids +the running of restore() when C<RUNTESTS_HARNESS> is active. B<ONLY> use this +when you are going to destory a MultiTestDB but DBs should not be cleaned up +or restored e.g. threads. See dbEntries.t for an example of how to use it. + +=cut + +use strict; +use warnings; + +use DBI; +use Data::Dumper; +use English qw(-no_match_vars); +use File::Basename; +use File::Copy; +use File::Spec::Functions; +use IO::File; +use IO::Dir; +use POSIX qw(strftime); + +use Bio::EnsEMBL::Utils::IO qw/slurp work_with_file/; +use Bio::EnsEMBL::Utils::Exception qw( warning throw ); + +use base 'Test::Builder::Module'; + +$OUTPUT_AUTOFLUSH = 1; + +sub diag { + my ($self, @args) = @_; + $self->builder()->diag(@args); + return; +} + +sub note { + my ($self, @args) = @_; + $self->builder()->note(@args); + return; +} + +use constant { + # Homo sapiens is used if no species is specified + DEFAULT_SPECIES => 'homo_sapiens', + + # Configuration file extension appended onto species name + FROZEN_CONF_SUFFIX => 'MultiTestDB.frozen.conf', + + CONF_FILE => 'MultiTestDB.conf', + DEFAULT_CONF_FILE => 'MultiTestDB.conf.default', + DUMP_DIR => 'test-genome-DBs', + ALTERNATIVE_DUMP_DIR => 'test-Genome-DBs', +}; + +sub get_db_conf { + my ($class, $current_directory) = @_; + # Create database from local config file + my $conf_file = catfile( $current_directory, CONF_FILE ); + my $db_conf = $class->_eval_file($conf_file); + die "Error while loading config file" if ! defined $db_conf; + + #Get the default if defined + my $default_conf_file = catfile( $current_directory, DEFAULT_CONF_FILE ); + my $default_db_conf; + if(-f $default_conf_file) { + $default_db_conf = $class->_eval_file($default_conf_file); + } + else { + my $tmpl = 'Cannot find the default config file at "%s"; if things do not work then this might be why'; + $class->note(sprintf($tmpl, $default_conf_file)); + $default_db_conf = {}; + } + + my %merged = ( + %{$default_db_conf}, + %{$db_conf}, + ); + + return \%merged; +} + +sub base_dump_dir { + my ($class, $current_directory) = @_; + my $dir = catdir( $current_directory, DUMP_DIR); + if(! -d $dir) { + my $alternative_dir = catdir($current_directory, ALTERNATIVE_DUMP_DIR); + if(-d $alternative_dir) { + $dir = $alternative_dir; + } + } + return $dir; +} + +sub new { + my ($class, $species, $user_submitted_curr_dir, $skip_database_loading) = @_; + + my $self = bless {}, $class; + + #If told the current directory where config lives then use it + if($user_submitted_curr_dir) { + $self->curr_dir($user_submitted_curr_dir); + } + else { + # Go and grab the current directory and store it away + my ( $package, $file, $line ) = caller; + my $curr_dir = ( File::Spec->splitpath($file) )[1]; + if (!defined($curr_dir) || $curr_dir eq q{}) { + $curr_dir = curdir(); + } + else { + $curr_dir = File::Spec->rel2abs($curr_dir); + } + $self->curr_dir($curr_dir); + } + $self->_rebless; + + if($ENV{'RUNTESTS_HARNESS'}) { + my $target_file = catfile($self->curr_dir() , 'CLEAN.t'); + if (! -e $target_file) { + my $clean_file = catfile( ( File::Spec->splitpath(__FILE__) )[1], 'CLEAN.pl' ); + copy($clean_file, $target_file ) or warning("# !! Could not copy $clean_file to $target_file\n"); + } + } + + $species ||= DEFAULT_SPECIES; + $self->species($species); + + if ( -e $self->get_frozen_config_file_path() ) { + $self->load_config(); + } + else { + if(!$skip_database_loading) { + # Load the databases and generate the conf hash + $self->load_databases(); + # Freeze configuration in a file + $self->store_config(); + } + else { + $self->{conf} = {}; + } + } + + # Generate the db_adaptors from the $self->{'conf'} hash + if(!$skip_database_loading) { + $self->create_adaptors(); + } + + return $self; +} + +# +# Rebless based on driver +# +sub _rebless { + my ($self) = @_; + my $driver = $self->db_conf->{driver}; + my $new_class = ref($self) . '::' . $driver; + eval "require $new_class"; + if ($EVAL_ERROR) { + $self->diag("Could not rebless to '$new_class': $EVAL_ERROR"); + } else { + bless $self, $new_class; + $self->note("Reblessed to '$new_class'"); + } + return $self; +} + +# +# Load configuration into $self->{'conf'} hash +# +sub load_config { + my ($self) = @_; + my $conf = $self->get_frozen_config_file_path(); + $self->{conf} = $self->_eval_file($conf); + return; +} + +# +# Build the target frozen config path +# + +sub get_frozen_config_file_path { + my ($self) = @_; + my $filename = sprintf('%s.%s', $self->species(), FROZEN_CONF_SUFFIX); + my $conf = catfile($self->curr_dir(), $filename); + return $conf; +} + +sub _eval_file { + my ($self, $file) = @_; + if ( !-e $file ) { + throw("Required configuration file '$file' does not exist"); + } + my $contents = slurp($file); + my $v = eval $contents; + die "Could not read in configuration file '$file': $EVAL_ERROR" if $EVAL_ERROR; + return $v; +} + +# +# Store $self->{'conf'} hash into a file +# +sub store_config { + my ($self) = @_; + my $conf = $self->get_frozen_config_file_path(); + work_with_file($conf, 'w', sub { + my ($fh) = @_; + local $Data::Dumper::Indent = 2; # we want everything on one line + local $Data::Dumper::Terse = 1; # and we want it without dummy variable names + local $Data::Dumper::Sortkeys = 1; # make stringification more deterministic + local $Data::Dumper::Quotekeys = 1; # conserve some space + local $Data::Dumper::Useqq = 1; # escape the \n and \t correctly + print $fh Dumper($self->{conf}); + return; + }); + return; +} + +# +# Create a set of adaptors based on the $self->{'conf'} hash +# + +sub create_adaptors { + my ($self) = @_; + foreach my $dbtype (keys %{$self->{conf}}) { + $self->create_adaptor($dbtype); + } + return; +} + +sub create_adaptor { + my ($self, $dbtype) = @_; + my $db = $self->{conf}->{$dbtype}; + my $module = $db->{module}; + if(eval "require $module") { + my %args = map { ( "-${_}", $db->{$_} ) } qw(dbname user pass port host driver species group); + if($dbtype eq 'hive') { + $args{"-no_sql_schema_version_check"} = 1; + $args{'-url'} = 'mysql://' . $args{'-user'} . ':' . $args{'-pass'} . '@' . $args{'-host'} . ':' . $args{'-port'} . '/' . $args{'-dbname'}; + } + if($dbtype eq 'funcgen') { + %args = (%args, map { ("-dnadb_${_}", $db->{${_}}) } qw/host user pass port/); + # We wish to select the most recent core database generated by this user's test scripts. + # This amounts to searching for the datase with the same prefix as the funcgen one, with the + # highest timestamp in suffix, i.e. the first element of the set of candidate name in reverse + # alphabetical order. + my $mysql_out; + if ($args{'-pass'}) { + $mysql_out = `mysql -NB -u $args{'-user'} -p$args{'-pass'} -h $args{'-host'} -P $args{'-port'} -e 'show databases'`; + } else { + $mysql_out = `mysql -NB -u $args{'-user'} -h $args{'-host'} -P $args{'-port'} -e 'show databases'`; + } + my @databases = split(/^/, $mysql_out); + my $dnadb_pattern = $args{'-dbname'}; + $dnadb_pattern =~ s/_funcgen_.*/_core_/; + my @core_databases = grep /^$dnadb_pattern/, @databases; + scalar(@core_databases) > 0 || die "Did not find any core database with pattern $dnadb_pattern:\n".join("\n", @databases); + my @sorted_core_databases = sort {$b cmp $a} @core_databases; + my $chosen_database = shift(@sorted_core_databases); + chomp $chosen_database; + $args{'-dnadb_name'} = $chosen_database; + } + my $adaptor = eval{ $module->new(%args) }; + if($EVAL_ERROR) { + $self->diag("!! Could not instantiate $dbtype DBAdaptor: $EVAL_ERROR"); + } + else { + $self->{db_adaptors}->{$dbtype} = $adaptor; + } + } + return; +} + +sub db_conf { + my ($self) = @_; + if(! $self->{db_conf}) { + $self->{db_conf} = $self->get_db_conf($self->curr_dir()); + } + return $self->{db_conf}; +} + +sub dbi_connection { + my ($self) = @_; + if(!$self->{dbi_connection}) { + my $db = $self->_db_conf_to_dbi($self->db_conf(), $self->_dbi_options); + if ( ! defined $db ) { + $self->diag("!! Can't connect to database: ".$DBI::errstr); + return; + } + $self->{dbi_connection} = $db; + } + return $self->{dbi_connection}; +} + +sub disconnect_dbi_connection { + my ($self) = @_; + if($self->{dbi_connection}) { + $self->do_disconnect; + delete $self->{dbi_connection}; + } + return; +} + +sub load_database { + my ($self, $dbtype) = @_; + my $db_conf = $self->db_conf(); + my $databases = $db_conf->{databases}; + my $preloaded = $db_conf->{preloaded} || {}; + my $species = $self->species(); + + if(! $databases->{$species}) { + die "Requested a database for species $species but the MultiTestDB.conf knows nothing about this"; + } + + my $config_hash = { %$db_conf }; + delete $config_hash->{databases}; + $config_hash->{module} = $databases->{$species}->{$dbtype}; + $config_hash->{species} = $species; + $config_hash->{group} = $dbtype; + $self->{conf}->{$dbtype} = $config_hash; + my $dbname = $preloaded->{$species}->{$dbtype}; + my $driver_handle = $self->dbi_connection(); + if($dbname && $self->_db_exists($driver_handle, $dbname)) { + $config_hash->{dbname} = $dbname; + $config_hash->{preloaded} = 1; + } + else { + if(! $dbname) { + $dbname = $self->create_db_name($dbtype); + delete $config_hash->{preloaded}; + } + else { + $config_hash->{preloaded} = 1; + } + + $config_hash->{dbname} = $dbname; + $self->note("Creating database $dbname"); + my %limits = ( 'mysql' => 64, 'pg' => 63 ); + if (my $l = $limits{lc $self->db_conf->{driver}}) { + if (length($dbname) > $l) { + die "Cannot create the database because its name is longer than the maximum the driver allows ($l characters)"; + } + } + my $db = $self->create_and_use_db($driver_handle, $dbname); + + my $base_dir = $self->base_dump_dir($self->curr_dir()); + my $dir_name = catdir( $base_dir, $species, $dbtype ); + $self->load_sql($dir_name, $db, 'table.sql', 'sql'); + $self->load_txt_dumps($dir_name, $dbname, $db); + $self->note("Loaded database '$dbname'"); + } + return; +} + +sub load_databases { + my ($self) = shift; + my $species = $self->species(); + + $self->note("Trying to load [$species] databases"); + # Create a configuration hash which will be frozen to a file + $self->{'conf'} = {}; + + my @db_types = keys %{$self->db_conf()->{databases}->{$species}}; + foreach my $dbtype (@db_types) { + $self->load_database($dbtype); + } + + $self->disconnect_dbi_connection(); + return; +} + +# +# Loads a DB from a single table.sql file or a set of *.sql files +# + +sub load_sql { + my ($self, $dir_name, $db, $override_name, $suffix, $override_must_exist) = @_; + my @files = $self->driver_dump_files($dir_name, $suffix); + + my ($all_tables_sql) = grep { basename($_) eq $override_name } @files; + return if $override_must_exist and not $all_tables_sql; + + my $sql_com = q{}; + if($all_tables_sql) { + @files = ($all_tables_sql); + } + foreach my $sql_file (@files) { + $self->note("Reading SQL from '$sql_file'"); + work_with_file($sql_file, 'r', sub { + my ($fh) = @_; + while(my $line = <$fh>) { + #ignore comments and white-space lines + if($line !~ /^#/ && $line =~ /\S/) { + $sql_com .= $line; + } + } + return; + }); + + } + + $sql_com =~ s/;$//; + my @statements = split( /;/, $sql_com ); + foreach my $sql (@statements) { + $db->do($sql); + } + + return; +} + +sub driver_dump_files { + my ($self, $dir_name, $suffix) = @_; + my $dir = IO::Dir->new($dir_name); + if(! defined $dir) { + $self->diag(" !! Could not open dump directory '$dir_name'"); + return; + } + my $driver_dir_name = catdir($dir_name, $self->db_conf->{driver}); + my $driver_dir = IO::Dir->new($driver_dir_name); + if ($driver_dir) { + $dir = $driver_dir; + $dir_name = $driver_dir_name; + } + my @files = map { catfile($dir_name, $_) } grep { $_ =~ /\.${suffix}$/ } $dir->read(); + $dir->close(); + return (@files); +} + +sub load_txt_dumps { + my ($self, $dir_name, $dbname, $db) = @_; + my $tables = $self->tables($db, $dbname); + foreach my $tablename (@{$tables}) { + my $txt_file = catfile($dir_name, $tablename.'.txt'); + if(! -f $txt_file || ! -r $txt_file) { + next; + } + $self->do_pre_sql($dir_name, $tablename, $db); + $db = $self->load_txt_dump($txt_file, $tablename, $db); # load_txt_dump may re-connect $db! + $self->do_post_sql($dir_name, $tablename, $db); + } + return; +} + +sub do_pre_sql { + my ($self, $dir_name, $tablename, $db) = @_; + $self->load_sql($dir_name, $db, "$tablename.pre", 'pre', 1); + return; +} + +sub do_post_sql { + my ($self, $dir_name, $tablename, $db) = @_; + $self->load_sql($dir_name, $db, "$tablename.post", 'post', 1); + return; +} + +sub tables { + my ($self, $db, $dbname) = @_; + my @tables; + my $sth = $db->table_info(undef, $self->_schema_name($dbname), q{%}, 'TABLE'); + while(my $array = $sth->fetchrow_arrayref()) { + push(@tables, $array->[2]); + } + return \@tables; +} + +sub get_DBAdaptor { + my ($self, $type, $die_if_not_found) = @_; + die "No type specified" if ! $type; + if(!$self->{db_adaptors}->{$type}) { + $self->diag("!! Database adaptor of type $type is not available"); + if($die_if_not_found) { + die "adaptor for $type is not available"; + } + return; + } + return $self->{db_adaptors}->{$type}; +} + +=head2 hide + + Arg [1] : string $dbtype + The type of the database containing the temporary table + Arg [2] : string $table + The name of the table to hide + Example : $multi_test_db->hide('core', 'gene', 'transcript', 'exon'); + Description: Hides the contents of specific table(s) in the specified + database. The table(s) are first renamed and an empty + table are created in their place by reading the table + schema file. + Returntype : none + Exceptions : Thrown if the adaptor for dbtype is not available + Thrown if both arguments are not defined + Warning if there is already a temporary ("hidden") + version of the table + Warning if a temporary ("hidden") version of the table + Cannot be created because its schema file cannot be read + Caller : general + +=cut + +sub hide { + my ( $self, $dbtype, @tables ) = @_; + + die("dbtype and table args must be defined\n") if ! $dbtype || !@tables; + my $adaptor = $self->get_DBAdaptor($dbtype, 1); + + foreach my $table (@tables) { + if ( $self->{'conf'}->{$dbtype}->{'hidden'}->{$table} ) { + $self->diag("!! Table '$table' is already hidden and cannot be hidden again"); + next; + } + + my $hidden_name = "_hidden_$table"; + # Copy contents of table into a temporary table + $adaptor->dbc->do("CREATE TABLE $hidden_name AS SELECT * FROM $table"); + # Delete the contents of the original table + $adaptor->dbc->do("DELETE FROM $table"); + # Update the temporary table configuration + $self->{'conf'}->{$dbtype}->{'hidden'}->{$table} = $hidden_name; + + $self->note("The table ${table} has been hidden in ${dbtype}"); + } + return; +} + +=head2 restore + + Arg [1] : (optional) $dbtype + The dbtype of the table(s) to be restored. If not + specified all hidden tables in all the databases are + restored. + Arg [2] : (optional) @tables + The name(s) of the table to be restored. If not + specified all hidden tables in the database $dbtype are + restored. + Example : $self->restore('core', 'gene', 'transcript', 'exon'); + Description: Restores a list of hidden tables. The current version of + the table is discarded and the hidden table is renamed. + Returntype : none + Exceptions : Thrown if the adaptor for a dbtype cannot be obtained + Caller : general + +=cut + +sub restore { + my ( $self, $dbtype, @tables ) = @_; + + if ( !$dbtype ) { + # Restore all of the tables in every dbtype + foreach my $dbtype ( keys %{ $self->{'conf'} } ) { + $self->restore($dbtype); + } + + # Lose the hidden table details + delete $self->{'conf'}->{'hidden'}; + + return; + } + + my $adaptor = $self->get_DBAdaptor($dbtype, 1); + + if ( !@tables ) { + # Restore all of the tables for this database + @tables = keys %{ $self->{'conf'}->{$dbtype}->{'hidden'} }; + } + + foreach my $table (@tables) { + my $hidden_name = $self->{'conf'}->{$dbtype}->{'hidden'}->{$table}; + + # Delete current contents of table + $adaptor->dbc->do("DELETE FROM $table"); + # Copy contents of tmp table back into main table + $adaptor->dbc->do("INSERT INTO $table SELECT * FROM $hidden_name"); + # Drop temp table + $adaptor->dbc->do("DROP TABLE $hidden_name"); + # Delete value from hidden table configuration + delete $self->{'conf'}->{$dbtype}->{'hidden'}->{$table}; + + $self->note("The table ${table} has been restored in ${dbtype}"); + } + return; +} + +=head2 save + + Arg [1] : string $dbtype + The type of the database containing the hidden/saved table + Arg [2] : string $table + The name of the table to save + Example : $multi_test_db->save('core', 'gene', 'transcript', 'exon'); + Description: Saves the contents of specific table(s) in the specified db. + The table(s) are first renamed and an empty table are created + in their place by reading the table schema file. The contents + of the renamed table(s) are then copied back into the newly + created tables. The method piggy-backs on the hide method + and simply adds in the copying/insertion call. + Returntype : none + Exceptions : thrown if the adaptor for dbtype is not available + warning if a table cannot be copied if the hidden table does not + exist + Caller : general + +=cut + +sub save { + my ( $self, $dbtype, @tables ) = @_; + + # Use the hide method to build the basic tables + $self->hide( $dbtype, @tables ); + + my $adaptor = $self->get_DBAdaptor($dbtype, 1); + + foreach my $table (@tables) { + my $hidden_name = ''; + # Only do if the hidden table exists + if ( $self->{'conf'}->{$dbtype}->{'hidden'}->{$table} ) { + $hidden_name = "_hidden_$table"; + # Copy the data from the hidden table into the new table + $adaptor->dbc->do("insert into $table select * from $hidden_name"); + $self->note("The table ${table} contents has been saved in ${dbtype}"); + } + else { + $self->diag("!! Hidden table '$hidden_name' does not exist so saving is not possible"); + } + } + return; +} + +=head2 save_permanent + + Arg [1] : string $dbtype + The type of the database containing the hidden/saved table + Arg [2-N] : string $table + The name of the table to save + Example : $multi_test_db->save_permanent('core', 'gene', 'transcript'); + Description: Saves the contents of specific table(s) in the specified db. + The backup tables are not deleted by restore() or cleanup(), so + this is mainly useful for debugging. + Returntype : none + Exceptions : thrown if the adaptor for dbtype is not available + warning if a table cannot be copied if the hidden table does not + exist + Caller : general + +=cut + +sub save_permanent { + my ( $self, $dbtype, @tables ) = @_; + + if ( !( $dbtype && @tables ) ) { + die("dbtype and table args must be defined\n"); + } + + my $adaptor = $self->get_DBAdaptor($dbtype, 1); + + $self->{'conf'}->{$dbtype}->{'_counter'}++; + + foreach my $table (@tables) { + my $hidden_name = "_bak_$table" . "_" . $self->{'conf'}->{$dbtype}->{'_counter'}; + $adaptor->dbc->do("CREATE TABLE $hidden_name AS SELECT * FROM $table"); + $self->note("The table ${table} has been permanently saved in ${dbtype}"); + } + return; +} + +sub _db_exists { + my ( $self, $db, $db_name ) = @_; + return 0 if ! $db_name; + my $db_names = $db->selectall_arrayref('SHOW DATABASES'); + foreach my $db_name_ref (@{$db_names}) { + return 1 if $db_name_ref->[0] eq $db_name; + } + return 0; +} + +sub compare { + my ( $self, $dbtype, $table ) = @_; + $self->diag('!! Compare method not yet implemented'); + return; +} + +sub species { + my ( $self, $species ) = @_; + $self->{species} = $species if $species; + return $self->{species}; +} + +sub curr_dir { + my ( $self, $cdir ) = @_; + $self->{'_curr_dir'} = $cdir if $cdir; + return $self->{'_curr_dir'}; +} + +sub create_db_name { + my ( $self, $dbtype ) = @_; + + my @localtime = localtime(); + my $date = strftime '%Y%m%d', @localtime; + my $time = strftime '%H%M%S', @localtime; + + my $species = $self->species(); + + # Create a unique name using host and date / time info + my $db_name = sprintf( + '%s_test_db_%s_%s_%s_%s', + ( exists $ENV{'LOGNAME'} ? $ENV{'LOGNAME'} : $ENV{'USER'} ), + $species, $dbtype, $date, $time + ); + if (my $path = $self->_db_path($self->dbi_connection)) { + $db_name = catfile($path, $db_name); + } + return $db_name; +} + +sub cleanup { + my ($self) = @_; + + # Remove all of the handles on db_adaptors + %{$self->{db_adaptors}} = (); + + # Delete each of the created temporary databases + foreach my $dbtype ( keys %{ $self->{conf} } ) { + my $db_conf = $self->{conf}->{$dbtype}; + next if $db_conf->{preloaded}; + my $db = $self->_db_conf_to_dbi($db_conf); + my $dbname = $db_conf->{'dbname'}; + $self->note("Dropping database $dbname"); + $self->_drop_database($db, $dbname); + } + + my $conf_file = $self->get_frozen_config_file_path(); + # Delete the frozen configuration file + if ( -e $conf_file && -f $conf_file ) { + $self->note("Deleting $conf_file"); + unlink $conf_file; + } + return; +} + +sub DESTROY { + my ($self) = @_; + + if ( $ENV{'RUNTESTS_HARNESS'} ) { + # Restore tables, do nothing else we want to use the database + # for the other tests as well + $self->note('Leaving database intact on server'); + if(!$ENV{'RUNTESTS_HARNESS_NORESTORE'}) { + $self->restore(); + } + } else { + # We are runnning a stand-alone test, cleanup created databases + $self->note('Cleaning up...'); + + # Restore database state since we may not actually delete it in + # the cleanup - it may be defined as a preloaded database + $self->restore(); + $self->cleanup(); + } + return; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/MultiTestDB/SQLite.pm b/modules/Bio/EnsEMBL/Test/MultiTestDB/SQLite.pm new file mode 100644 index 0000000..88bc7d6 --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/MultiTestDB/SQLite.pm @@ -0,0 +1,147 @@ +=head1 LICENSE + +Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::MultiTestDB::SQLite; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::MultiTestDB::SQLite + +=head1 DESCRIPTION + +SQLite specifics for Bio::EnsEMBL::Test::MultiTestDB. + +Used automatically, as determined by the 'driver' setting in MultiTestDB.conf. + +=cut + +use strict; +use warnings; + +use English qw(-no_match_vars); +use File::Basename; +use File::Path qw(make_path); +use File::Spec::Functions; # catfile + +use base 'Bio::EnsEMBL::Test::MultiTestDB'; + +sub load_txt_dump { + my ($self, $txt_file, $tablename, $db) = @_; + + $db->disconnect; + + my $db_type = basename(dirname($txt_file)); # yuck!! + my $db_file = $self->{conf}->{$db_type}->{dbname}; # yuck, but at least it's there + my $command = sprintf('.import %s %s', $txt_file, $tablename); + system('sqlite3', '-separator', "\t", $db_file, $command) == 0 + or die "sqlite3 import of '$txt_file' failed: $?"; + + $db = $self->_do_connect($db_file); + + # NULL processing + my $sth = $db->column_info(undef, 'main', $tablename, '%'); + my $cols = $sth->fetchall_arrayref({}); + foreach my $col (@$cols) { + if ($col->{NULLABLE} == 1) { + my $colname = $col->{COLUMN_NAME}; + my $up_sth = $db->prepare(sprintf( + 'UPDATE %s SET %s = NULL WHERE %s IN ("NULL", "\N")', + $tablename, $colname, $colname)); + my $rows = $up_sth->execute; + $self->note("Table $tablename, column $colname: set $rows rows to NULL") if $rows > 0; + } + } + + return $db; +} + +our %dbi; + +sub create_and_use_db { + my ($self, $db, $dbname) = @_; + return $dbi{$dbname} if $dbi{$dbname}; + + my $create_db = $self->_do_connect($dbname); + if(! $create_db) { + $self->note("!! Could not create database [$dbname]"); + return; + } + return $dbi{$dbname} = $create_db; +} + +sub _do_connect { + my ($self, $dbname) = @_; + + my $locator = sprintf('DBI:SQLite:dbname=%s', $dbname); + my $dbh = DBI->connect($locator, undef, undef, { RaiseError => 1 } ); + return $dbi{$dbname} = $dbh; +} + +sub _db_conf_to_dbi { + my ($self, $db_conf, $options) = @_; + my $dbdir = $db_conf->{dbdir}; + unless ($dbdir) { + $self->diag("!! Must specify dbdir for SQLIte files"); + return; + } + make_path($dbdir, {error => \my $err}); + if (@$err) { + $self->diag("!! Couldn't create path '$dbdir'"); + return; + } + return { + db_conf => $db_conf, + options => $options, + }; +} + +sub _dbi_options { + my $self = shift; + return undef; +} + +sub _schema_name { + my ($self, $dbname) = @_; + return 'main'; +} + +sub _db_path { + my ($self, $driver_handle) = @_; + return $driver_handle->{db_conf}->{dbdir}; +} + +sub _drop_database { + my ($self, $db, $dbname) = @_; + + eval { unlink $dbname }; + $self->diag("Could not drop database $dbname: $EVAL_ERROR") if $EVAL_ERROR; + + return; +} + +sub do_disconnect { + my ($self) = @_; + foreach my $dbname ( keys %dbi ) { + $dbi{$dbname}->disconnect; + } + return; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/MultiTestDB/mysql.pm b/modules/Bio/EnsEMBL/Test/MultiTestDB/mysql.pm new file mode 100644 index 0000000..c8fa1f9 --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/MultiTestDB/mysql.pm @@ -0,0 +1,106 @@ +=head1 LICENSE + +Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::MultiTestDB::mysql; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::MultiTestDB::mysql + +=head1 DESCRIPTION + +MySQL specifics for Bio::EnsEMBL::Test::MultiTestDB. + +Used automatically, as determined by the 'driver' setting in MultiTestDB.conf. + +=cut + +use strict; +use warnings; + +use English qw(-no_match_vars); + +use base 'Bio::EnsEMBL::Test::MultiTestDB'; + +sub load_txt_dump { + my ($self, $txt_file, $tablename, $db) = @_; + my $load = sprintf(q{LOAD DATA LOCAL INFILE '%s' INTO TABLE `%s` FIELDS ESCAPED BY '\\\\'}, $txt_file, $tablename); + $db->do($load); + return $db; +} + +sub create_and_use_db { + my ($self, $db, $dbname) = @_; + my $create_db = $db->do("CREATE DATABASE $dbname"); + if(! $create_db) { + $self->note("!! Could not create database [$dbname]"); + return; + } + + $db->do('use '.$dbname); + return $db; +} + +sub _db_conf_to_dbi { + my ($self, $db_conf, $options) = @_; + my %params = (host => $db_conf->{host}, port => $db_conf->{port}); + %params = (%params, %{$options}) if $options; + my $param_str = join(q{;}, map { $_.'='.$params{$_} } keys %params); + my $locator = sprintf('DBI:%s:%s', $db_conf->{driver}, $param_str); + my $db = DBI->connect( $locator, $db_conf->{user}, $db_conf->{pass}, { RaiseError => 1 } ); + return $db if $db; + $self->diag("Can't connect to database '$locator': ". $DBI::errstr); + return; +} + +sub _dbi_options { + my $self = shift; + return {mysql_local_infile => 1}; +} + +sub _schema_name { + my ($self, $dbname) = @_; + return $dbname; +} + +sub _db_path { + my ($self, $driver_handle) = @_; + return; +} + +sub _drop_database { + my ($self, $db, $dbname) = @_; + + eval {$db->do("DROP DATABASE $dbname");}; + $self->diag("Could not drop database $dbname: $EVAL_ERROR") if $EVAL_ERROR; + + $db->disconnect(); + + return; +} + +sub do_disconnect { + my ($self) = @_; + my $db = $self->dbi_connection(); + $db->disconnect; + return; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/RunPipeline.pm b/modules/Bio/EnsEMBL/Test/RunPipeline.pm new file mode 100644 index 0000000..b998c99 --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/RunPipeline.pm @@ -0,0 +1,444 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::RunPipeline; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::RunPipeline + +=head1 SYNOPSIS + +use Bio::EnsEMBL::Test::MultiTestDB; +use Bio::EnsEMBL::Test::RunPipeline; + +my $hive = Bio::EnsEMBL::Test::MultiTestDB->new('hive'); +my $pipeline = Bio::EnsEMBL::Test::RunPipeline->new( + $hive->get_DBAdaptor('hive'), 'Bio::EnsEMBL::PipeConfig::My_conf', '-options'); + +$pipeline->run(); + +=head1 DESCRIPTION + +This module automatically runs the specified pipeline on a test database. The module +is responsible for + +=over 8 + +=item Setting up ENSEMBL_CVS_ROOT_DIR + +=item Setting up PATH to point to ensembl-hive/scripts + +=item Writing the contents of Bio::EnsEMBL::Registry to a tmp file + +=item Initalising the pipeline (can cause entire pipeline bail out) + +=item Running beekeeper locally (can cause entire pipeline bail out) + +=back + +You are expected to provide + +=over 8 + +=item A DBAdaptor instance pointing to a possible hive DB + +=item Any options required for init_pipeline.pl to run (including target tmp dirs) + +=item The module to run + +=item Any fake binaries already on the PATH before running the pipeline + +=back + +=cut + +use strict; +use warnings; + +use English qw(-no_match_vars); +use File::Temp; +use File::Spec; +use File::Spec::Functions; +use Bio::EnsEMBL::Test::MultiTestDB; + +use Bio::EnsEMBL::Registry; + +use base 'Test::Builder::Module'; + +$OUTPUT_AUTOFLUSH = 1; + +=head2 init_pipeline + +Runs init_pipeline.pl creating the hive DB + +=cut + +sub init_pipeline { + my ($self, $pipeline) = @_; + + my $dba = $self->pipe_db(); + my $dbc = $dba->dbc(); + my $run = sprintf( + "init_pipeline.pl %s -registry %s -pipeline_db -host=%s -pipeline_db -port=%s -pipeline_name=%s -password '%s' -pipeline_db -dbname=%s -user=%s %s", + $pipeline, $self->reg_file(), $dbc->host(), $dbc->port(), $dbc->dbname(), $dbc->password(), $dbc->dbname(), $dbc->user(), $self->pipe_options + ); + $self->builder()->note("Initiating pipeline"); + $self->builder()->note($run); + my $status = system($run); + if ($? != 0 ) { + $status = $? >> 8; + return $status; + } + return $status; +} + +=head2 run_beekeeper_loop + +Runs beekeeper in a loop. You can control the sleep time using + + $self->beekeeper_sleep() + +=cut + +sub run_beekeeper_loop { + my ($self) = @_; + my $sleep = $self->beekeeper_sleep(); + return $self->run_beekeeper('-no_analysis_stats -loop -sleep '.$sleep); +} + +=head2 run_beekeeper_final_status + +Runs beekeeper to print out the final analysis status + + $self->run_beekeeper_final_status() + +=cut + +sub run_beekeeper_final_status { + my ($self) = @_; + return $self->run_beekeeper(); +} + +=head2 run_beekeeper_sync + +Syncs the hive + +=cut + +sub run_beekeeper_sync { + my ($self) = @_; + return $self->run_beekeeper('-sync'); +} + +=head2 run_beekeeper + +Runs beekeeper with any given cmd line options. Meadow and max workers are controlled via + + $self->meadow() + $self->max_workers() + +=cut + +sub run_beekeeper { + my ($self, $cmd_line_options) = @_; + $cmd_line_options ||= q{}; + my $dba = $self->pipe_db(); + my $url = $self->hive_url(); + my $meadow = $self->meadow(); + my $max_workers = $self->max_workers(); + my $run = "beekeeper.pl -url $url -meadow $meadow -total_running_workers_max $max_workers -reg_conf " . + $self->reg_file() . ' '. $cmd_line_options; + $self->builder()->note("Starting pipeline"); + $self->builder()->note($run); + my $status = system($run); + if ($status != 0 ) { + $status = $CHILD_ERROR >> 8; + } + return $status; +} + +=head2 new + +Create a new module. See SYNOPSIS for details on how to use + +=cut + +sub new { + my ($class, $pipeline, $options) = @_; + + $class = ref($class) || $class; + my $self = bless {}, $class; + + # Go and grab the current directory and store it away + my ( $package, $file, $line ) = caller; + my $curr_dir = ( File::Spec->splitpath($file) )[1]; + if (!defined($curr_dir) || $curr_dir eq q{}) { + $curr_dir = curdir(); + } + else { + $curr_dir = File::Spec->rel2abs($curr_dir); + } + + $self->curr_dir($curr_dir); + $self->pipeline($pipeline); + $self->pipe_options($options); + + $self->setup_environment(); + + #Intalise the hive database + $self->hive_multi_test_db(); + + return $self; +} + +=head2 add_fake_binaries + +Allows you to add directories held in the ensembl-xxxx/modules/t directory +(held in curr_dir()) which hold fake binaries for a pipeline. + +=cut + +sub add_fake_binaries { + my ($self, $fake_binary_dir) = @_; + my $binary_dir = File::Spec->catdir($self->curr_dir(), $fake_binary_dir); + $binary_dir = File::Spec->rel2abs($binary_dir); + $ENV{PATH} = join(q{:}, $binary_dir, $ENV{PATH}); + $self->builder->note('Fake binary dir added. PATH is now: '.$ENV{PATH}); + return; +} + +=head2 run + +Sets the pipeline going. This includes registry writing, initalisation, syncing, and running. See +SYNPOSIS for more information. + +=cut + +sub run { + my ($self) = @_; + + my $pipeline = $self->pipeline(); + + #Write the registry out + $self->write_registry(); + + #Run the init + my $init = $self->init_pipeline($pipeline); + if ($init != 0) { $self->builder()->BAIL_OUT("init_pipeline.pl failed with error code: ".$init); } + + #disconnect from the hive DB + $self->pipe_db->dbc->disconnect_if_idle(); + + #Sync and loop the pipeline + my $bees_sync = $self->run_beekeeper_sync(); + if ($bees_sync != 0) { $self->builder()->BAIL_OUT("beekeeper.pl sync failed with error code: ".$bees_sync); } + my $bees_loop = $self->run_beekeeper_loop(); + if ($bees_loop != 0) { $self->builder()->BAIL_OUT("beekeeper.pl loop failed with error code: ".$bees_loop); } + + return $self; +} + +=head2 setup_environment + +When run this will setup the ENSEMBL_CVS_ROOT_DIR if not already set and +will add the PATH to ensembl-hive/scripts + +=cut + +sub setup_environment { + my ($self) = @_; + my $curr_dir = $self->curr_dir(); + my $up = File::Spec->updir(); + + my $cvs_root_dir; + #Setup the CVS ROOT DIR ENV if not already there + if(! exists $ENV{ENSEMBL_CVS_ROOT_DIR}) { + #Curr dir will be a t dir. Ascend up until we hit a ensembl-hive dir. Break after 3 ups + #since that's the normal location + $cvs_root_dir = $self->curr_dir(); + my $found = 0; + foreach my $index (1..3) { + $cvs_root_dir = File::Spec->catdir($cvs_root_dir, $up); + if( -e File::Spec->catdir($cvs_root_dir, 'ensembl-hive')) { + $found = 1; + last; + } + } + if(! $found) { + $self->builder()->BAIL_OUT("Cannot continue since we could not find a ensembl-hive directory"); + } + $ENV{ENSEMBL_CVS_ROOT_DIR} = $cvs_root_dir; + } + else { + $cvs_root_dir = $ENV{ENSEMBL_CVS_ROOT_DIR}; + } + + #Set the PATH + my $hive_script_dir = File::Spec->catdir($cvs_root_dir, 'ensembl-hive', 'scripts'); + $ENV{PATH} = join(q{:}, $hive_script_dir, $ENV{PATH}); + $self->builder->note('Setting up hive. PATH is now: '.$ENV{PATH}); + + #Stop registry from moaning + Bio::EnsEMBL::Registry->no_version_check(1); + + return; +} + +=head2 write_registry + +Write the current contents of the Registry out to a Perl file + +=cut + +sub write_registry { + my ($self, $dba) = @_; + my $fh = File::Temp->new(); + $fh->unlink_on_destroy(1); + $self->registry_file($fh); + my %used_namespaces; + + my $adaptors = Bio::EnsEMBL::Registry->get_all_DBAdaptors(); + my @final_adaptors = grep { $_ !~ 'Hive' } @{$adaptors}; + if(! @final_adaptors) { + print $fh "1;\n"; + return; + } + + print $fh "{\n"; + foreach my $adaptor (@final_adaptors) { + my $namespace = ref($adaptor); + if(! exists $used_namespaces{$namespace}) { + print $fh "use $namespace;\n"; + $used_namespaces{$namespace} = 1; + } + my $dbc = $adaptor->dbc(); + print $fh "$namespace->new(\n"; + print $fh "-HOST => '".$dbc->host."',\n"; + print $fh "-PORT => '".$dbc->port."',\n"; + print $fh "-USER => '".$dbc->username."',\n"; + print $fh "-PASS => '".$dbc->password."',\n"; + print $fh "-DBNAME => '" . $dbc->dbname . "',\n"; + print $fh "-SPECIES => '" . $adaptor->species . "',\n"; + print $fh "-GROUP => '". $adaptor->group."',\n"; + print $fh ");\n"; + } + + print $fh "}\n"; + print $fh "1;\n"; + + $fh->close(); + return; +} + +=head2 _drop_hive_database + +Remove the current hive DB + +=cut + +sub _drop_hive_database { + my ($self) = @_; + my $dba = $self->pipe_db(); + my $dbc = $dba->dbc(); + $dbc->do('drop database '.$dbc->dbname()); + return; +} + +=head2 hive_url + +Generate a hive compatible URL from the object's hive dbadaptor + +=cut + +sub hive_url { + my ($self) = @_; + my $dba = $self->pipe_db(); + my $dbc = $dba->dbc(); + my $url = sprintf( + "mysql://%s:%s@%s:%s/%s", + $dbc->username(), $dbc->password(), $dbc->host(), $dbc->port(), $dbc->dbname() + ); + return $url; +} + +sub reg_file { + my ($self) = @_; + return $self->registry_file()->filename(); +} + +sub registry_file { + my ($self, $registry_file) = @_; + $self->{registry_file} = $registry_file if $registry_file; + return $self->{registry_file}; +} + +sub pipe_db { + my ($self, $db) = @_; + return $self->hive_multi_test_db->get_DBAdaptor('hive'); +} + +sub pipeline { + my ( $self, $pipeline ) = @_; + $self->{pipeline} = $pipeline if $pipeline; + return $self->{pipeline}; +} + +sub pipe_options { + my ( $self, $options ) = @_; + $self->{options} = $options if $options; + return $self->{options} || q{}; +} + +sub curr_dir { + my ( $self, $cdir ) = @_; + $self->{'_curr_dir'} = $cdir if $cdir; + return $self->{'_curr_dir'}; +} + +sub meadow { + my ($self, $meadow) = @_; + $self->{meadow} = $meadow if $meadow; + return $self->{meadow} || 'LOCAL'; +} + +sub beekeeper_sleep { + my ($self, $beekeeper_sleep) = @_; + $self->{beekeeper_sleep} = $beekeeper_sleep if $beekeeper_sleep; + return $self->{beekeeper_sleep} || 0.1; +} + +sub max_workers { + my ($self, $max_workers) = @_; + $self->{max_workers} = $max_workers if $max_workers; + return $self->{max_workers} || 2; +} + +sub hive_multi_test_db { + my ($self) = @_; + if(! $self->{hive_multi_test_db}) { + $self->{hive_multi_test_db} = Bio::EnsEMBL::Test::MultiTestDB->new('hive', $self->curr_dir()); + #have to drop the hive DB first. Bit backwards tbh but hive needs to create the DB + $self->_drop_hive_database(); + } + return $self->{hive_multi_test_db}; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/StaticHTTPD.pm b/modules/Bio/EnsEMBL/Test/StaticHTTPD.pm new file mode 100644 index 0000000..4197b0e --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/StaticHTTPD.pm @@ -0,0 +1,109 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::StaticHTTPD; + +=pod + +=head1 NAME + +Bio::EnsEMBL::Test::StaticHTTPD; + +=head1 SYNOPSIS + + my $root_dir = '/path/to/static/files'; + my $httpd = Bio::EnsEMBL::Test::StaticHTTPD->new($root_dir); + my $endppoint = $httpd->endpoint; + + ok(do_GET($endpoint . '/file.txt'), 'Basic successful fetch'); + +=head1 DESCRIPTION + +This module creates a simple HTTPD daemon that returns static files in the +root_dir if they exist, return content-type will always be text/plain. + +If the file doesn't exist in the root_dir, a 404 error code will be returned. + +The HTTPD daemon is destroyed on exit. + +=cut + +use strict; +use warnings; + +use Test::More; +use Test::Exception; +use File::Spec; + +use Bio::EnsEMBL::Utils::IO qw/slurp/; + +require_ok('Test::Fake::HTTPD'); + +use base 'Test::Builder::Module'; + +=head2 new + + Arg[1] : string $root_dir + The directory where files to be returned by + the HTTPD live, similar to DocumentRoot in Apache + Arg[2] : int $timeout + Optional argument for httpd timeout, defaults + to 30 seconds + + Returntype : httpd instance + +=cut + +sub new { + my ($self, $root_dir, $timeout) = @_; + + # Do we have a valid DocumentRoot + ok( -d $root_dir, 'Root dir for HTTPD is valid'); + + # Create the new HTTPD instance + my $httpd = Test::Fake::HTTPD->new( + timeout => (defined $timeout ? $timeout : 30), + ); + + # Stash the root_dir for the run subroutine + $ENV{httpd_root_dir} = $root_dir; + + # Callback routine for serving requests + $httpd->run(sub { + my ($req) = @_; + my $uri = $req->uri; + + # Make the file path based on our DocumentRoot and requested path + my $file = File::Spec->catpath(undef, $ENV{httpd_root_dir}, $uri); + + return do { + if( -f $file ) { + my $file_contents = slurp($file); + [ 200, [ 'Content-Type', 'text/pain'], [ $file_contents ] ]; + } else { + [ 404, [ 'Content-type', 'text/plain' ], ['File does not exist']]; + } + } + }); + + ok( defined $httpd, 'Got a web server' ); + diag( sprintf "You can connect to your server at %s.\n", $httpd->host_port ); + return $httpd; +} + +1; diff --git a/modules/Bio/EnsEMBL/Test/TestUtils.pm b/modules/Bio/EnsEMBL/Test/TestUtils.pm new file mode 100644 index 0000000..df70be4 --- /dev/null +++ b/modules/Bio/EnsEMBL/Test/TestUtils.pm @@ -0,0 +1,635 @@ +=head1 LICENSE + +Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +=cut + +package Bio::EnsEMBL::Test::TestUtils; + + +=head1 NAME + +Bio::EnsEMBL::Test::TestUtils - Utilities for testing the EnsEMBL Perl API + +=head1 SYNOPSIS + + debug("Testing Bio::EnsEMBL::Slice->foo() method"); + ok( &test_getter_setter( $object, 'foo', 'value' ) ); + count_rows( $human_dba, "gene" ); + +=head1 DESCRIPTION + +This module contains a several utilities for testing the EnsEMBL Perl API. + +=head1 EXPORTS + +This modules exports the following methods by default: + + - debug + - test_getter_setter + - count_rows + - find_circular_refs + - dump_vars + +=head1 CONTACT + +Email questions to the ensembl developer mailing list +<http://lists.ensembl.org/mailman/listinfo/dev> + +=head1 METHODS + +=cut + +use strict; +use warnings; + +use Exporter; + + +use Devel::Peek; +use Devel::Cycle; +use Error qw(:try); +use IO::String; +use PadWalker qw/peek_our peek_my/; +use Test::Builder::Module; +use Bio::EnsEMBL::Utils::IO qw/gz_work_with_file work_with_file/; + +use vars qw( @ISA @EXPORT ); + +@ISA = qw(Exporter Test::Builder::Module); +@EXPORT = qw( + debug + test_getter_setter + count_rows + find_circular_refs + capture_std_streams + is_rows + warns_like + mock_object + ok_directory_contents + is_file_line_count + has_apache2_licence + all_has_apache2_licence + all_source_code +); + +=head2 test_getter_setter + + Arg [1] : Object $object + The object to test the getter setter on + Arg [2] : string $method + The name of the getter setter method to test + Arg [3] : $test_val + The value to use to test the set behavior of the method. + Example : ok(&TestUtils::test_getter_setter($object, 'type', 'value')); + Description: Tests a getter setter method by attempting to set a value + and verifying that the newly set value can be retrieved. + The old value of the the attribute is restored after the + test (providing the method functions correctly). + Returntype : boolean - true value on success, false on failure + Exceptions : none + Caller : test scripts + +=cut + +sub test_getter_setter +{ + my ( $object, $method, $test_val ) = @_; + + my $ret_val = 0; + + # Save the old value + my $old_val = $object->$method(); + + $object->$method($test_val); + + # Verify value was set + $ret_val = + ( ( !defined($test_val) && !defined( $object->$method() ) ) + || ( $object->$method() eq $test_val ) ); + + # Restore the old value + $object->$method($old_val); + + return $ret_val; +} + +=head2 debug + + Arg [...] : array of strings to be printed + Example : debug("Testing Bio::EnsEMBL::Slice->foo() method") + Description: Prints a debug message on the standard error console + if the verbosity has not been swithed off + Returntype : none + Exceptions : none + Caller : test scripts + +=cut + +sub debug { + Bio::EnsEMBL::Test::TestUtils->builder->note(@_); +} + +=head2 count_rows + + Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba + Arg [2] : string $tablename + Arg [3] : string $constraint + Arg [4] : Array $params + Example : count_rows($human_dba, "gene"); + Example : count_rows($human_dba, "gene", 'where analysis_id=?', [1028]); + Description: Returns the number of rows in the table $tablename + Returntype : int + Exceptions : none + Caller : test scripts + +=cut + +sub count_rows +{ + my $db = shift; + my $tablename = shift; + my $constraint = shift; + my $params = shift; + + $constraint ||= q{}; + $params ||= []; + + my $sth = $db->dbc->prepare("select count(*) from $tablename $constraint"); + + $sth->execute(@{$params}); + + my ($count) = $sth->fetchrow_array(); + + return $count; +} + +=head2 is_rows + + Arg [1] : int $expected_count + Arg [2] : Bio::EnsEMBL::DBSQL::DBAdaptor $dba + Arg [3] : string $tablename + Arg [4] : string $constraint + Arg [5] : Array $params + Example : is_rows(20, $human_dba, "gene"); + Example : is_rows(0, $human_dba, "gene", 'where analysis_id =?', [1025]); + Description: Asserts the count returned is the same as the expected value + Returntype : None + Exceptions : None + Caller : test scripts + +=cut + +sub is_rows { + my ($expected_count, $db, $tablename, $constraint, $params) = @_; + $constraint ||= q{}; + my $actual_count = count_rows($db, $tablename, $constraint, $params); + my $joined_params = join(q{, }, @{($params || [] )}); + my $name = sprintf(q{Asserting row count is %d from %s with constraint '%s' with params [%s]}, + $expected_count, $tablename, $constraint, $joined_params + ); + return __PACKAGE__->builder->is_num($actual_count, $expected_count, $name); +} + +=head2 capture_std_streams + + Arg [1] : CodeRef callback to execute which will attempt to write to STD streams + Arg [2] : Boolean 1-dump variables + Example : capture_std_streams(sub { + my ($stdout_ref, $stderr_ref) = @_; + print 'hello'; + is(${$stdout_ref}, 'hello', 'STDOUT contains expected';) + }); + Description : Provides access to the STDOUT and STDERR streams captured into + references. This allows you to assert code which writes to + these streams but offers no way of changing their output + stream. + Returntype : None + Exceptions : None + Caller : test scripts + +=cut + +sub capture_std_streams { + my ($callback) = @_; + + my ($stderr_string, $stdout_string) = (q{}, q{}); + + my $new_stderr = IO::String->new(\$stderr_string); + my $old_stderr_fh = select(STDERR); + local *STDERR = $new_stderr; + + my $new_stdout = IO::String->new(\$stdout_string); + my $old_stdout_fh = select(STDOUT); + local *STDOUT = $new_stdout; + + $callback->(\$stdout_string, \$stderr_string); + + return; +} + +=head2 warns_like + + Arg [1] : CodeRef code to run; can be a code ref or a block since we can prototype into a code block + Arg [2] : Regex regular expression to run against the thrown warnings + Arg [3] : String message to print to screen + Example : warns_like { do_something(); } qr/^expected warning$/, 'I expect this!'; + warns_like(sub { do_something(); }, qr/^expected$/, 'I expect this!'); + Description: Attempts to run the given code block and then regexs the captured + warnings raised to SIG{'__WARN__'}. This is done using + Test::Builder so we are Test::More compliant. + Returntype : None + Exceptions : none + Caller : test scripts + +=cut + +sub warns_like (&$;$) { + my ($callback, $regex, $msg) = @_; + my $warnings; + local $SIG{'__WARN__'} = sub { + $warnings .= $_[0]; + }; + $callback->(); + return __PACKAGE__->builder()->like($warnings, $regex, $msg); +} + +=head2 ok_directory_contents + + Arg [1] : String directory to search for files in + Arg [2] : ArrayRef filenames to look for + Arg [3] : String message to print + Example : ok_directory_contents('/etc', 'hosts', '/etc/hosts is there'); + Description: + Returntype : Boolean declares if the test was a success + Exceptions : none + Caller : test scripts + +=cut + +sub ok_directory_contents ($$;$) { + my ($dir, $files, $msg) = @_; + my $result; + my @missing; + foreach my $file (@{$files}) { + my $full_path = File::Spec->catfile($dir, $file); + if(! -e $full_path || ! -s $full_path) { + push(@missing, $file); + } + } + my $builder = __PACKAGE__->builder(); + if(@missing) { + $result = $builder->ok(0, $msg); + $builder->diag("Directory '$dir' is missing the following files"); + my $missing_msg = join(q{, }, @missing); + $builder->diag(sprintf('[%s]', $missing_msg)); + } + else { + $result = $builder->ok(1, $msg); + } + return $result; +} + +=head2 is_file_line_count + + Arg [1] : String file to test. Can be a gzipped file or uncompressed + Arg [2] : Integer the number of expected rows + Arg [3] : String optional message to print to screen + Example : is_file_line_count('/etc/hosts', 10, 'We have 10 entries in /etc/hosts'); + Description: Opens the given file (can be gzipped or not) and counts the number of + lines by simple line iteration + Returntype : Boolean Declares if the test succeeeded or not + Exceptions : none + Caller : test scripts + +=cut + +sub is_file_line_count ($$;$;$) { + my ($file, $expected_count, $msg, $pattern) = @_; + my $builder = __PACKAGE__->builder(); + if(! -e $file) { + my $r = $builder->ok(0, $msg); + $builder->diag("$file does not exist"); + return $r; + } + + my $count = 0; + my $sub_counter = sub { + my ($fh) = @_; + while(my $line = <$fh>) { + if ($pattern && $line !~ /$pattern/) { next; } + $count++; + } + return; + }; + + if($file =~ /.gz$/) { + gz_work_with_file($file, 'r', $sub_counter); + } + else { + work_with_file($file, 'r', $sub_counter); + } + + return $builder->cmp_ok($count, '==', $expected_count, $msg); +} + +=head2 mock_object + + Arg [1] : Object used to mock + Arg [2] : Boolean 1-dump variables + Example : my $mock = mock_object($obj); $mock->hello(); is($mock->_called('hello'), 1); + Description: Returns a mock object which counts the number of times a method + is invoked on itself. This is very useful to use when we want + to make sure certain methods are & are not called. + Returntype : Bio::EnsEMBL::Test::TestUtils::MockObject + Exceptions : none + Caller : test scripts + +=cut + +sub mock_object { + my ($obj) = @_; + return Bio::EnsEMBL::Test::TestUtils::MockObject->new($obj); +} + +=head2 all_has_apache2_licence + + Arg [n] : Directories to scan. Defaults to blib, t, modules, lib and sql + should they exist (remember relative locations matter if you give them) + Example : my @files = all_has_apache2_licence(); + my @files = all_has_apache2_licence('../lib/t'); + Description: Scans the given directories and returns all found instances of + source code. This includes Perl (pl,pm,t), Java(java), C(c,h) and + SQL (sql) suffixed files. It then looks for the Apache licence 2.0 + declaration in the top of the file (30 lines leway given). + + Should you not need it to scan a directory then put a no critic + declaration at the top. This will prevent the code from scanning and + mis-marking the file. The scanner directive is (American spelling also supported) + no critic (RequireApache2Licence) + Returntype : Boolean indicating if all given directories has source code + with the expected licence + +=cut + +sub all_has_apache2_licence { + my @files = all_source_code(@_); + my $ok = 1; + foreach my $file (@files) { + $ok = 0 if ! has_apache2_licence($file); + } + return $ok; +} + +=head2 has_apache2_licence + + Arg [1] : File path to the file to test + Example : has_apache2_licence('/my/file.pm'); + Description: Asserts if we can find the short version of the Apache v2.0 + licence within the first 30 lines of the given file. You can + skip the test with a C<no critic (RequireApache2Licence)> tag. We + also support the American spelling of this. + Returntype : None + Exceptions : None + +=cut + +sub has_apache2_licence { + my ($file) = @_; + my $count = 0; + my $max_lines = 30; + my ($found_copyright, $found_url, $found_warranties, $skip_test) = (0,0,0,0); + open my $fh, '<', $file or die "Cannot open $file: $!"; + while(my $line = <$fh>) { + last if $count >= $max_lines; + if($line =~ /no critic \(RequireApache2Licen(c|s)e\)/) { + $skip_test = 1; + last; + } + $found_copyright = 1 if $line =~ /Apache License, Version 2\.0/; + $found_url = 1 if $line =~ /www.apache.org.+LICENSE-2.0/; + $found_warranties = 1 if $line =~ /WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND/; + $count++; + } + close $fh; + if($skip_test) { + return __PACKAGE__->builder->ok(1, "$file has a no critic (RequireApache2Licence) directive"); + } + if($found_copyright && $found_url && $found_warranties) { + return __PACKAGE__->builder->ok(1, "$file has a Apache v2.0 licence declaration"); + } + __PACKAGE__->builder->diag("$file is missing Apache v2.0 declaration"); + __PACKAGE__->builder->diag("$file is missing Apache URL"); + __PACKAGE__->builder->diag("$file is missing Apache v2.0 warranties"); + return __PACKAGE__->builder->ok(0, "$file does not have an Apache v2.0 licence declaration in the first $max_lines lines"); +} + +=head2 all_source_code + + Arg [n] : Directories to scan. Defaults to blib, t, modules, lib and sql + should they exist (remember relative locations matter if you give them) + Example : my @files = all_source_code(); + my @files = all_source_code('lib/t'); + Description: Scans the given directories and returns all found instances of + source code. This includes Perl (pl,pm,t), Java(java), C(c,h) and + SQL (sql) suffixed files. + Returntype : Array of all found files + +=cut + +sub all_source_code { + my @starting_dirs = @_ ? @_ : _starting_dirs(); + my %starting_dir_lookup = map {$_,1} @starting_dirs; + my @files; + my @dirs = @starting_dirs; + my @modules; + while ( my $file = shift @dirs ) { + if ( -d $file ) { + opendir my $dir, $file or next; + my @new_files = + grep { $_ ne 'CVS' && $_ ne '.svn' && $_ ne '.git' && $_ !~ /^\./ } + File::Spec->no_upwards(readdir $dir); + closedir $dir; + push(@dirs, map {File::Spec->catpath($file, $_)} @new_files); + } + if ( -f $file ) { + next unless $file =~ /(?-xism:\.(?:[cht]|p[lm]|java|sql))/; + push(@files, $file); + } + } # while + return @files; +} + +sub _starting_dirs { + my @dirs; + push(@dirs, grep { -e $_ } qw/blib lib sql t modules/); + return @dirs; +} + +=head2 find_circular_refs + + Arg [1] : Boolean 1-print cycles + Arg [2] : Boolean 1-dump variables + Example : my $count = find_circular_refs(1,1); + Description: Returns the number of variables with circular references. + Only variables which are ensembl objects are considered. + The sub will go through variables which are in scope at the point it was called. + Returntype : int + Exceptions : none + Caller : test scripts + +=cut + +my %ensembl_objects = (); +my $cycle_found; +my $print_cycles; + +sub find_circular_refs { + + $print_cycles = shift; + my $dump_vars = shift; + my $message; + my $lexical = peek_my(1); + + while (my ($var, $ref) = each %$lexical) { + my $dref = $ref; + while (ref($dref) eq "REF") { + $dref = $$dref; + } + if ( ref($dref) =~ /Bio\:\:EnsEMBL/ and !defined($ensembl_objects{$var.ref($dref)}) ) { + $ensembl_objects{$var.ref($dref)} = 0; + $message = $var ." ". ref($dref); + _get_cycles($var,$dref,$message, $dump_vars); + } + if (ref($dref) eq "HASH") { + my %dref_hash = %$dref; + my $value_count = 0; + foreach my $key (keys %dref_hash) { + $value_count ++; + if (ref($dref_hash{$key}) =~ /Bio\:\:EnsEMBL/ and !defined($ensembl_objects{$var.$value_count.ref($dref_hash{$key})} ) ) { + $ensembl_objects{$var.$value_count.ref($dref_hash{$key})} = 0; + $message = $var . " HASH value ".$value_count." ". ref($dref_hash{$key}); + _get_cycles($var,$dref_hash{$key},$message,$dump_vars,$key); + } + } + } + if (ref($dref) eq "ARRAY") { + #for an array check the first element only + my @dref_array = @$dref; + + if (ref($dref_array[0]) =~ /Bio\:\:EnsEMBL/ and !defined($ensembl_objects{$var."0".ref($dref_array[0])}) ) { + $ensembl_objects{$var."0".ref($dref_array[0])} = 0; + $message = $var ." ARRAY element 0 ". ref($dref_array[0]); + _get_cycles($var,$dref_array[0],$message,$dump_vars,undef,0); + } + + } + + } + my $circular_count = 0; + foreach my $value (values %ensembl_objects) { + $circular_count += $value; + } + return $circular_count; +} + +sub _get_cycles { + + my $var = shift; + my $dref = shift; + my $message = shift; + my $dump_vars = shift; + my $hash_key = shift; + my $array_element = shift; + + $cycle_found = 0; + if ($print_cycles) { + find_cycle($dref); + find_cycle($dref, \&_count_cycles); + } + else { + #use try/catch to return after 1st cycle is found if we're not printing cycles + try { + find_cycle($dref, \&_count_cycles); + } + catch Error::Simple with { + + }; + } + + if ($cycle_found) { + + my $key = ""; + if ($hash_key) { + $key = $var.$hash_key; + } + elsif (defined $array_element) { + $key = $var.$array_element; + } + $ensembl_objects{$key.ref($dref)} += 1; + print "circular reference found in ".$message."\n"; + if ($dump_vars) { + Dump($dref); + } + } +} + +sub _count_cycles { + if (!$print_cycles && $cycle_found) { + throw Error::Simple; + } + my $cycle_array_ref = shift; + my @cycle_array = @$cycle_array_ref; + if (scalar(@cycle_array) > 0) { + $cycle_found = 1; + } +} + +#See mock_object() for more information about how to use +package Bio::EnsEMBL::Test::TestUtils::MockObject; + +use base qw/Bio::EnsEMBL::Utils::Proxy/; + +sub __clear { + my ($self) = @_; + $self->{__counts} = undef; +} + +sub __called { + my ($self, $method) = @_; + return $self->{__counts}->{$method} if exists $self->{__counts}->{$method}; + return 0; +} + +sub __is_called { + my ($self, $method, $times, $msg) = @_; + my $calls = $self->__called($method); + return Bio::EnsEMBL::Test::TestUtils->builder()->is_num($calls, $times, $msg); +} + +sub __resolver { + my ($invoker, $package, $method) = @_; + return sub { + my ($self, @args) = @_; + my $wantarray = wantarray(); + $self->{__counts}->{$method} = 0 unless $self->{__counts}->{$method}; + my @capture = $self->__proxy()->$method(@args); + $self->{__counts}->{$method}++; + return @capture if $wantarray; + return shift @capture; + }; +} + +1; diff --git a/scripts/MultiTestDB.conf.example b/scripts/MultiTestDB.conf.example new file mode 100644 index 0000000..309cf04 --- /dev/null +++ b/scripts/MultiTestDB.conf.example @@ -0,0 +1,26 @@ +{ + 'port' => '3306', + 'driver' => 'mysql', + 'user' => 'ensadmin', + 'pass' => 'XXX', + 'host' => 'ens-research', + 'zip' => 'test-genome-DBs.zip', + + # add a line with the dbname and module + 'databases' => { + 'multi' => + { 'compara' => 'Bio::EnsEMBL::Compara::DBSQL::DBAdaptor' }, + 'homo_sapiens' => { 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor' }, + 'mus_musculus' => { 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor' }, + 'rattus_norvegicus' => + { 'core' => 'Bio::EnsEMBL::DBSQL::DBAdaptor' } + }, + + # uncomment to use preloaded databases (useful when doing lots of + # testing) + # 'preloaded' => { + # 'multi' => { 'compara' => 'ensembl_compara_test' }, + # 'homo_sapiens' => { 'core' => 'homo_sapiens_core_test' }, + # 'mus_musculus' => { 'core' => 'mus_musculus_core_test' }, + # 'rattus_norvegicus' => { 'core' => 'rattus_norvegicus_core_test' } } +} diff --git a/scripts/README b/scripts/README new file mode 100755 index 0000000..6b99d28 --- /dev/null +++ b/scripts/README @@ -0,0 +1,66 @@ + This directory contains a script "runtests.pl" that is used to run a + set of test (*.t extension files) present in the directory given as + argument (see example below) + + Example of set of tests currently in use can be found e.g. in + ensembl/modules/t or ensembl-compara/modules/t + + In order to run the tests, you MUST have a MultiTestDB.conf file in + the directory where the set of tests to be run are present. You + have an MultiTestDB.conf.example in the current directory that can + be copied to the right place and updated at your convinience. The + file gives the information (username, host, etc.) for a MySQL + instance where you have write permission. Running test case(s) + which require a database will automatically create a temporary + database during the test(s) execution. In case you want to remove + the temporary database after all tests, use the -c command line + switch. + + To use the tests you must add the ensembl-test modules to your + PERL5LIB environment variable. + + Example (for tcsh or csh): + + setenv PERL5LIB ${PERL5LIB}:${ENSHOME}/ensembl-test/modules + + Example (for ksh or bash): + + export PERL5LIB=$PERL5LIB:${ENSHOME}/ensembl-test/modules + + To run multiple tests use the runtests.pl script in + ensembl-test/scripts + + Examples: + + # Run all tests in the t directory + runtests.pl t + + # Run 3 tests + runtests.pl t/gene.t t/exon.t t/densityFeature.t + + # Run a single test + runtests.pl t/gene.t + + # Run all tests in the current directory + runtests.pl + + # Run all tests in the current directory and clean up + runtests.pl -c + + The ensembl-test module use standard perl libraries from which you + can get some information + + perldoc Test + perldoc Test::Harness + + Have also a look at + + perldoc Bio::EnsEMBL::Test::MultiTestDB + perldoc Bio::EnsEMBL::Test::TestUtils + + There are also extra information more specific to a particular git + repository in e.g. + + https://github.com/Ensembl/ensembl/blob/master/modules/t/README + https://github.com/Ensembl/ensembl-compara/blob/master/modules/t/README + diff --git a/scripts/README.dump_test_schema b/scripts/README.dump_test_schema new file mode 100644 index 0000000..e87befb --- /dev/null +++ b/scripts/README.dump_test_schema @@ -0,0 +1,11 @@ +dump_test_schema.pl requires the following modules, which cpanm should +be capable of installing: + + MooseX::App::Simple + DBIx::Class::Schema::Loader + SQL::Translator + +These have more than a few dependencies! + +DBD::mysql is also required but if you're using the EnsEMBL API you +probably have that already. diff --git a/scripts/cleanup_databases.pl b/scripts/cleanup_databases.pl new file mode 100755 index 0000000..98df388 --- /dev/null +++ b/scripts/cleanup_databases.pl @@ -0,0 +1,87 @@ +#!/usr/bin/env perl +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +use strict; +use warnings; + +use Bio::EnsEMBL::Test::MultiTestDB; +use Getopt::Long; +use Pod::Usage; + +sub run { + my ($class) = @_; + $ENV{RUNTESTS_HARNESS} = 0; + my $self = bless({}, 'main'); + $self->args(); + my $config = Bio::EnsEMBL::Test::MultiTestDB->get_db_conf($self->{opts}->{curr_dir}); + foreach my $species (sort keys %{$config->{databases}}) { + my $multi = Bio::EnsEMBL::Test::MultiTestDB->new($species, $self->{opts}->{curr_dir}); + undef $multi; + } + return; +} + +sub args { + my ($self) = @_; + my $opts = {}; + GetOptions( + $opts, qw/ + curr_dir=s + help + man + / + ) or pod2usage(-verbose => 1, -exitval => 1); + pod2usage(-verbose => 1, -exitval => 0) if $opts->{help}; + pod2usage(-verbose => 2, -exitval => 0) if $opts->{man}; + + pod2usage(-verbose => 2, -exitval => 2, -msg => "No --curr_dir option given") if ! $opts->{curr_dir}; + pod2usage(-verbose => 2, -exitval => 2, -msg => "--curr_dir is not a directory") if ! -d $opts->{curr_dir}; + my $config = File::Spec->catfile($opts->{curr_dir}, 'MultiTestDB.conf'); + pod2usage(-verbose => 2, -exitval => 2, -msg => "Cannot find a MultiTestDB.conf at '${config}'. Check your --curr_dir command line option") if ! -f $config; + + $self->{opts} = $opts; + return; +} + +run(); + +1; +__END__ + +=head1 NAME + + cleanup_databases.pl + +=head1 SYNOPSIS + + ./cleanup_databases.pl --curr_dir ensembl/modules/t + +=head1 DESCRIPTION + +Loads any available frozen files in the given directory, loads those schemas and attempts +to run cleanup of the databases + +=head1 OPTIONS + +=over 8 + +=item B<--curr_dir> + +Current directory. Should be set to the directory which has your configuration files + +=back + +=cut \ No newline at end of file diff --git a/scripts/clone_core_database.pl b/scripts/clone_core_database.pl new file mode 100755 index 0000000..bca095e --- /dev/null +++ b/scripts/clone_core_database.pl @@ -0,0 +1,552 @@ +#!/usr/bin/env perl +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +use strict; +use warnings; + +use Bio::EnsEMBL::Registry; +use Bio::EnsEMBL::DBSQL::DBConnection; +use Bio::EnsEMBL::DBSQL::DBAdaptor; +use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor; +use Bio::EnsEMBL::Test::DumpDatabase; +use Bio::EnsEMBL::Utils::IO qw/slurp/; +use Bio::EnsEMBL::Utils::Scalar qw/scope_guard/; +use File::Temp qw/tempfile/; +use Getopt::Long qw/:config no_ignore_case/; +use JSON; +use Pod::Usage; +use POSIX; +use Scalar::Util qw/looks_like_number/; + +my %global_tables = ( + core => [qw/attrib_type meta coord_system external_db misc_attrib unmapped_reason/], + funcgen => [qw/feature_set/], +); + +run(); + +sub run { + my $self = bless({}, __PACKAGE__); + $self->parse_options(); + $self->load_registry(); + $self->load_json(); + $self->process(); +} + +sub parse_options { + my ($self) = @_; + my $opts = { + port => 3306, + user => 'ensro', + dest_port => 3306 + }; + + GetOptions($opts, qw/ + host|hostname|h=s + port|P=i + user|username|u=s + pass|password|p=s + dbname|database|db=s + species=s + + dest_host|dest_hostname|dh=s + dest_port|dP=i + dest_user|dest_username|du=s + dest_pass|dest_password|dp=s + + registry|reg_conf=s + + json=s + + directory=s + drop_database + + help + man + /) or pod2usage(-msg => 'Misconfigured options given', -verbose => 1, -exitval => 1); + pod2usage(-verbose => 1, -exitval => 0) if $opts->{help}; + pod2usage(-verbose => 2, -exitval => 0) if $opts->{man}; + return $self->{opts} = $opts; +} + +sub load_registry { + my ($self) = @_; + my $opts = $self->{opts}; + if($opts->{registry}) { + print STDERR "Loading from registry\n"; + Bio::EnsEMBL::Registry->load_all($opts->{registry}); + } + elsif($opts->{host} && $opts->{port} && $opts->{user} && $opts->{dbname}) { + my %args = ( + -HOST => $opts->{host}, -PORT => $opts->{port}, + -USER => $opts->{user}, -DBNAME => $opts->{dbname}, + -SPECIES => $opts->{species} + ); + $args{-PASS} = $opts->{pass}; + Bio::EnsEMBL::DBSQL::DBAdaptor->new(%args); + } + else { + pod2usage(-msg => 'Misconfigured source database. Please give a -registry file or -host, -port, -user, -dbname and -species', -verbose => 1, -exitval => 1); + } + return; +} + +sub target_dbc { + my ($self) = @_; + my $opts = $self->{opts}; + if(!$opts->{dest_host} && !$opts->{dest_user}) { + pod2usage(-msg => 'Misconfigured target database. Please give a -dest_host, -dest_port, -dest_user ', -verbose => 1, -exitval => 1); + } + my %args = ( + -HOST => $opts->{dest_host}, -PORT => $opts->{dest_port}, + -USER => $opts->{dest_user} + ); + $args{-PASS} = $opts->{dest_pass} if $opts->{dest_pass}; + return $self->{dbc} = Bio::EnsEMBL::DBSQL::DBConnection->new(%args); +} + +sub load_json { + my ($self) = @_; + my $json_location = $self->{opts}->{json}; + pod2usage(-msg => 'No -json configuration given', -verbose => 1, -exitval => 1) unless $json_location; + pod2usage(-msg => "JSON location $json_location does not exist", -verbose => 1, -exitval => 1) unless -f $json_location; + my $slurp = slurp($json_location); + my $json = JSON->new()->relaxed(1); + return $self->{json} = $json->decode($slurp); +} + +sub process { + my ($self) = @_; + my $dbc = $self->target_dbc(); + my $config_hash = $self->{json}; + my $is_dna = 1; + + foreach my $species (keys %{$config_hash}) { + foreach my $group (keys %{$config_hash->{$species}}) { + $is_dna = 0 if $group eq 'funcgen'; + my $registry = 'Bio::EnsEMBL::Registry'; + my $from = $registry->get_DBAdaptor($species, $group); + my $info = $config_hash->{$species}->{$group}; + my $regions = $info->{regions}; + my $adaptors = $info->{adaptors}; + my $to = $self->copy_database_structure($species, $group, $dbc); + $self->copy_globals($from, $to); + my $slices = $self->copy_regions($from, $to, $regions, $is_dna); + my $filter_exceptions = $info->{filter_exceptions}; + foreach my $adaptor_info (@{$adaptors}) { + $self->copy_features($from, $to, $slices, $adaptor_info, $filter_exceptions); + } + $self->dump_database($to); + $self->drop_database($to); + } + } +} + +sub dump_database { + my ($self, $dba) = @_; + my $dir = $self->{opts}->{directory}; + if($dir) { + print STDERR "Directory given; will dump database to this location\n"; + my $dumper = Bio::EnsEMBL::Test::DumpDatabase->new($dba, $dir); + $dumper->dump(); + } + return; +} + +sub drop_database { + my ($self, $dba) = @_; + if($self->{opts}->{drop_database}) { + print STDERR "Dropping the database\n"; + my $dbc = $dba->dbc(); + my $db = $dbc->dbname; + $dbc->do('drop database '.$db); + delete $dbc->{dbname}; + $dbc->disconnect_if_idle(); + } + return; +} + + +sub copy_globals { + my ($self, $from, $to) = @_; + my $schema = $from->get_MetaContainer()->single_value_by_key('schema_type'); + $schema ||= $from->group(); + my $tables = $global_tables{$schema}; + $self->copy_all_data($from, $to, $_) for @{$tables}; + return; +} + +# Starts the copy across of Slices +sub copy_regions { + my ($self, $from, $to, $regions, $is_dna) = @_; + my $coord_sql = "select name, coord_system_id from coord_system"; + my $coord_systems = $to->dbc->sql_helper()->execute_into_hash(-SQL => $coord_sql); + + my $slice_adaptor = $from->get_adaptor("Slice"); + my $seq_region_names; + + # Grab all toplevel slices and record those IDs which need to be + # transferred for the + my @toplevel_slices; + my %seq_region_id_list; + foreach my $region (@{$regions}) { + my ($name, $start, $end, $coord_system, $version) = @{$region}; + my $strand = undef; + $coord_system ||= 'toplevel'; + #Make the assumption that the core API is OK and that the 3 levels of assembly are chromosome, supercontig and contig + #Also only get those slices which are unique + my $slice = $slice_adaptor->fetch_by_region($coord_system, $name, $start, $end, $strand, $version); + if(! $slice) { + print STDERR "Could not find a slice for $name .. $start .. $end\n"; + next; + } + push(@toplevel_slices, $slice); + my $supercontigs; + + #May not always have supercontigs + if ( $coord_systems->{'supercontig'} ) { + $supercontigs = $slice->project('supercontig'); + foreach my $supercontig (@$supercontigs) { + my $supercontig_slice = $supercontig->[2]; + $seq_region_id_list{$supercontig_slice->get_seq_region_id} = 1; + } + } + + #Assume always have contigs + my $contigs = $slice->project('contig'); + foreach my $contig (@$contigs) { + my $contig_slice = $contig->[2]; + $seq_region_id_list{$contig_slice->get_seq_region_id} = 1; + } + + } + + #Copy the information about each contig/supercontig's assembly + my $seq_region_ids = join(q{,}, keys %seq_region_id_list); + if ($is_dna) { + my $sr_query = "SELECT a.* FROM seq_region s JOIN assembly a ON (s.seq_region_id = a.cmp_seq_region_id) WHERE seq_region_id IN ($seq_region_ids)"; + $self->copy_data($from, $to, "assembly", $sr_query); + } + + + # Once we've got the original list of slices we have to know if one is an + # assembly what it maps to & bring that seq_region along (toplevel def). If + # seq is wanted then user has to specify that region + my @seq_region_exception_ids; + foreach my $slice (@toplevel_slices) { + next if $slice->is_reference(); + my $exception_features = $slice->get_all_AssemblyExceptionFeatures(); + foreach my $exception (@{$exception_features}) { + push(@seq_region_exception_ids, $slice_adaptor->get_seq_region_id($exception->slice())); + push(@seq_region_exception_ids, $slice_adaptor->get_seq_region_id($exception->alternate_slice())); + } + } + + #Grab the copied IDs from the target DB & use this to drive the copy of assembly exceptions + my $asm_cmp_ids = join(q{,}, @seq_region_exception_ids); + if (scalar(@seq_region_exception_ids) > 0) { + $self->copy_data($from, $to, 'assembly_exception', "SELECT * FROM assembly_exception WHERE seq_region_id in ($asm_cmp_ids)"); + } + + #Now transfer all seq_regions from seq_region into the new DB + my @seq_regions_to_copy = (@seq_region_exception_ids, (map { $slice_adaptor->get_seq_region_id($_) } @toplevel_slices), keys %seq_region_id_list); + my $seq_regions_to_copy_in = join(q{,}, @seq_regions_to_copy); + $self->copy_data($from, $to, 'seq_region', "SELECT * FROM seq_region WHERE seq_region_id in ($seq_regions_to_copy_in)"); + $self->copy_data($from, $to, 'seq_region_attrib', "SELECT * FROM seq_region_attrib WHERE seq_region_id in ($seq_regions_to_copy_in)") if $is_dna; + $self->copy_data($from, $to, 'dna', "SELECT * FROM dna WHERE seq_region_id in ($seq_regions_to_copy_in)") if $is_dna; + + return \@toplevel_slices; +} + +sub copy_features { + my ($self, $from, $to, $slices, $adaptor_info) = @_; + my $name = $adaptor_info->{name}; + my $suppress_warnings = $adaptor_info->{suppress_warnings}; + my $sig_warn; + my $sig_warn_guard; + if($suppress_warnings) { + $sig_warn = $SIG{__WARN__}; + $sig_warn_guard = scope_guard(sub { $SIG{__WARN__} = $sig_warn }); + $SIG{__WARN__} = sub {}; #ignore everything + } + print STDERR "Copying $name features\n"; + my $from_adaptor = $from->get_adaptor($name); + my $to_adaptor = $to->get_adaptor($name); + my $method = $adaptor_info->{method} || 'fetch_all_by_Slice'; + my $args = $adaptor_info->{args} || []; + foreach my $slice (@{$slices}) { + my $features = $from_adaptor->$method($slice, @{$args}); + my $total_features = scalar(@{$features}); + my $count = 0; + foreach my $f (@{$features}) { + if($f->can('stable_id')) { + print STDERR sprintf('Processing %s'."\n", $f->stable_id()); + } + else { + if($count != 0 && ($count % 100 == 0)) { + print STDERR sprintf('Processing %d out of %d'."\n", $count, $total_features); + } + } + + $f = $self->post_process_feature($f, $slice); + next unless $f; # means we decided not to store it + $to_adaptor->store($f); + $count++; + } + } + return; +} + +sub copy_database_structure { + my ($self, $species, $group, $target_dbc) = @_; + my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species, $group); + my $dbc = $dba->dbc(); + my $target_name = $self->new_dbname($dba->dbc()->dbname()); + my $source_name = $dba->dbc->dbname(); + print STDERR "Copying schema from ${source_name} into '${target_name}'\n"; + $target_dbc->do('drop database if exists '.$target_name); + $target_dbc->do('create database '.$target_name); + my $cmd_tmpl = 'mysqldump --host=%s --port=%d --user=%s --no-data --skip-add-locks --skip-lock-tables %s | mysql --host=%s --port=%d --user=%s --password=%s %s'; + my @src_args = map { $dbc->$_() } qw/host port username dbname/; + my @trg_args = ((map { $target_dbc->$_() } qw/host port username password/), $target_name); + my $cmd = sprintf($cmd_tmpl, @src_args, @trg_args); + system($cmd); + my $rc = $? >> 8; + if($rc != 0 ) { + die "Could not execute command '$cmd'; got return code of $rc"; + } + $target_dbc->dbname($target_name); + $target_dbc->do('use '.$target_name); + print STDERR "Finished population\n"; + my $dbadaptor; + if ($group eq 'funcgen') { + $dbadaptor = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new( + -DBCONN => $target_dbc, + -GROUP => $group, + -SPECIES => $target_name, + -DNADB => $dba->dnadb(), + ); + } else { + $dbadaptor = Bio::EnsEMBL::DBSQL::DBAdaptor->new( + -DBCONN => $target_dbc, + -GROUP => $group, + -SPECIES => $target_name, + ); + } + return $dbadaptor; +} + +sub get_ids { + my ($self, $dba, $id, $table ) = @_; + my $sql = "SELECT distinct($id) FROM $table"; + my $ids = $dba->dbc->sql_helper->execute_simple( -SQL => $sql ); + return $ids; +} + +sub copy_all_data { + my ($self, $from, $to, $table) = @_; + my $query = "select * from $table"; + return $self->copy_data($from, $to, $table, $query); +} + +sub copy_data { + my ($self, $from, $to, $table, $query) = @_; + print STDERR "Copying to $table\n\tQuery : '${query}'\n"; + my ($fh, $filename) = tempfile(); + $from->dbc->sql_helper()->execute_no_return( + -SQL => $query, + -CALLBACK => sub { + my ($row) = @_; + my @copy; + foreach my $e (@{$row}) { + if(!defined $e) { + $e = '\N'; + } + elsif(!looks_like_number($e)) { + $e =~ s/\n/\\\n/g; + $e =~ s/\t/\\\t/g; + } + push(@copy, $e); + } + my $line = join(qq{\t}, @copy); + print $fh $line, "\n"; + } + ); + close $fh; + my $target_load_sql = "LOAD DATA LOCAL INFILE '$filename' INTO TABLE $table"; + return $to->dbc->do($target_load_sql); +} + +sub new_dbname { + my ($self, $dbname) = @_; + my @localtime = localtime(); + my $date = strftime '%Y%m%d', @localtime; + my $time = strftime '%H%M%S', @localtime; + return sprintf('%s_%s_%s_%s',$ENV{'USER'}, $date, $time, $dbname); +} + +sub post_process_feature { + my ($self, $f, $slice, $filter_exception) = @_; + my $filter = $self->filter_on_exception($f, $slice, $filter_exception); + return if $filter; + + #Core objects + if($f->can('load')) { + $f->load(); + } + elsif($f->isa('Bio::EnsEMBL::RepeatFeature')) { + $self->_load_repeat($f); + } + + + return $f; +} + +sub filter_on_exception { + my ($self, $f, $slice) = @_; + if($f->start() < 1) { + return 1; + } + if($f->start() > $slice->end()) { + return 1; + } + return 0; +} + +sub _load_repeat { + my ($self, $f) = @_; + delete $f->repeat_consensus()->{dbID}; + delete $f->repeat_consensus()->{adaptor}; + return; +} + +__END__ + +=head1 NAME + + clone_core_database.pl + +=head1 SYNOPSIS + + clone_core_database.pl -host HOST [-port PORT] -user USER [-pass PASS] -dbname DBNAME \ + [-registry REG] \ + -species SPECIES \ + -dest_host HOST -dest_port PORT -dest_user USER -dest_pass PASS \ + -json JSON \ + -directory DIR \ + [-drop_database] + +=head1 DESCRIPTION + +This script will take a JSON file of regions and adaptor calls and translates +this into a dump of a core database of controlled content. This gives +you as realistic a core database as we can provide perfect for testing. + +=head1 PARAMETERS + +=over 8 + +=item B<--host | --hostname | -h> + +Host of the server to use as a source. Not required if you are using a registry file + +=item B<--port | --P> + +Port of the server to use as a source. Not required if you are using a registry file + +=item B<--user | --username | -u> + +Username of the server to use as a source. Not required if you are using a registry file + +=item B<--pass | --password | -p> + +Password of the server to use as a source. Not required if you are using a registry file + +=item B<--dbname | --database | --db> + +Database name of the server to use as a source. Not required if you are using a registry file + +=item B<--species> + +Species name to use. Not required if you are using a registry file + +=item B<--registry | --reg_conf> + +Registry file to load data from + +=item B<--dest_host | --dest_hostname | --dh> + +Target host for the database. Required parameter + +=item B<--dest_port | --dP> + +Target port for the database. Required parameter + +=item B<--dest_user | --dest_username | --du> + +Target user for the database. Required parameter + +=item B<--dest_pass | --dest_password | --dp> + +Target password for the database. + +=item B<--json> + +JSON configuration file which informs this script of the regions of data +to grab, from which species/group and what adaptors should be called to +fetch data for. If just a name is given to the adaptor array we assume +a call to C<fetch_all_by_Slice()> is wanted. Otherwise we will use the +method and the given arguments and store that data. + +An example configuration is given below. JSON is in relaxed mode so +inline shell comments (#) and trailing commas are allowed. + + { + "human" : { + "core" : { + "regions" : [ + ["6", 1000000, 2000000], + ["X", 1, 3000000], + ["Y", 1, 100000], + ["Y", 2649521, 4000000] + ], + "adaptors" : [ + { "name" : "gene", "method" : "fetch_all_by_Slice", "args" : [] }, + { "name" : "repeatfeature" } + ] + } + } + } + +=item B<--directory> + +The directory to dump the data into. You will get 1 TXT file per table and +1 SQL file for the entire schema. + +=item B<--drop_database> + +Indicates if you wish to drop the database from the server post flat file +generation. If not you will have to manually drop the database. + +=item B<--help> + +Print help messages + +=item B<--man> + +Print the man page for this script + +=back diff --git a/scripts/convert_test_schemas.sh b/scripts/convert_test_schemas.sh new file mode 100755 index 0000000..7972be4 --- /dev/null +++ b/scripts/convert_test_schemas.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +# Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +script_dir=$( cd $( dirname $0 ); echo $PWD ) +test_dir=$1 + +if [ ! -d "${test_dir}" ]; then + echo "Cannot find: ${test_dir}" + exit 1; +fi + +dumped_schema='Bio-EnsEMBL-Test-Schema-0.1-SQLite.sql' +dest_schema='table.sql' + +convert_schema() { + local species db_type + species="$1" + db_type="$2" + + schema_dir="${test_dir}/test-genome-DBs/${species}/${db_type}" + if [ ! -d "${schema_dir}" ]; then + echo "Cannot find: ${schema_dir}" + exit 1; + fi + + echo "Dumping '$species' - '$db_type'" + "${script_dir}/dump_test_schema.pl" --species "${species}" --db_type "${db_type}" --test_dir "${test_dir}" + + dest_dir="${schema_dir}/SQLite" + mkdir -v -p "${dest_dir}" + mv -v "${dumped_schema}" "${dest_dir}/${dest_schema}" + echo +} + +( + cd "${test_dir}/test-genome-DBs" + for species in *; do + ( + cd "${species}" + for db_type in *; do + convert_schema "${species}" "${db_type}" + done + ) + done +) + +exit 0 + +# EOF diff --git a/scripts/dump_test_schema.pl b/scripts/dump_test_schema.pl new file mode 100755 index 0000000..438dc40 --- /dev/null +++ b/scripts/dump_test_schema.pl @@ -0,0 +1,198 @@ +#!/usr/bin/env perl + +# Copyright [1999-2013] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package Bio::EnsEMBL::App::DumpTestSchema; + +use 5.010; + +use MooseX::App::Simple qw(Color); + +use File::Slurp; +use File::Spec; + +use Bio::EnsEMBL::Test::MultiTestDB; +use DBIx::Class::Schema::Loader qw(make_schema_at); + +option 'test_dir' => ( + is => 'ro', + isa => 'Str', + default => sub { $ENV{PWD} }, + cmd_aliases => [qw/test-dir testdir/], + documentation => q[Directory containing MultiTestDB.conf], + ); + +option 'species' => ( + is => 'ro', + isa => 'Str', + default => 'homo_sapiens', + documentation => q[Species], + ); + +option 'db_type' => ( + is => 'ro', + isa => 'Str', + default => 'core', + cmd_aliases => [qw/db-type dbtype/], + documentation => q[Database type], + ); + +option 'dump_schema' => ( + is => 'ro', + isa => 'Bool', + cmd_aliases => [qw/dump-schema dumpschema/], + documentation => q[Dump DBIC schema], + ); + +option 'schema_class' => ( + is => 'ro', + isa => 'Str', + default => 'Bio::EnsEMBL::Test::Schema', + cmd_aliases => [qw/schema-class schemaclass/], + documentation => q[Generated schema class], + ); + +option 'schema_dir' => ( + is => 'ro', + isa => 'Str', + default => sub { $ENV{PWD} }, + cmd_aliases => [qw/schema-dir schemadir/], + documentation => q[Directory for schema class dump], + ); + +option 'ddl_dir' => ( + is => 'ro', + isa => 'Str', + default => sub { $ENV{PWD} }, + cmd_aliases => [qw/ddl-dir ddldir/], + documentation => q[Directory for ddl output], + ); + +option 'version' => ( + is => 'ro', + isa => 'Str', + default => '0.1', + documentation => q[Generated schema version], + ); + +option 'check_driver' => ( + is => 'ro', + isa => 'Str', + default => 'mysql', + cmd_aliases => [qw/check-driver checkdriver/], + documentation => q[Expected source DBD driver type], + ); + +option 'dump_driver' => ( + is => 'ro', + isa => 'Str', + default => 'SQLite', + cmd_aliases => [qw/dump-driver dumpdriver/], + documentation => q[Destination DBD driver type], + ); + +has 'dbc' => ( + is => 'rw', + isa => 'Bio::EnsEMBL::DBSQL::DBConnection', + ); + +has ddl_file => ( + is => 'ro', + isa => 'Str', + builder => '_build_ddl_file', + lazy => 1, + ); + +sub _build_ddl_file { + my ($self) = @_; + + my $class_file = $self->schema_class; + $class_file =~ s/::/-/g; + + my $filename = join('-', $class_file, $self->version, $self->dump_driver); + $filename .= '.sql'; + + return File::Spec->catfile($self->ddl_dir, $filename); +} + +sub run { + my ($self) = @_; + + my $mdb = $self->get_MultiTestDB; + my $dbc = $self->dbc($mdb->get_DBAdaptor($self->db_type)->dbc); + + my $driver = $dbc->driver; + my $check_driver = $self->check_driver; + die "Driver is '$driver' but expected '$check_driver'" unless $driver eq $check_driver; + + $self->make_schema; + $self->create_ddl; + $self->patch_ddl; + + return; +} + +sub get_MultiTestDB { + my ($self) = @_; + my $mdb = Bio::EnsEMBL::Test::MultiTestDB->new($self->species, $self->test_dir, 1); + $mdb->load_database($self->db_type); + $mdb->create_adaptor($self->db_type); + return $mdb; +} + +sub make_schema { + my ($self) = @_; + + my $loader_options = { naming => 'current' }; + $loader_options->{dump_directory} = $self->schema_dir if $self->dump_schema; + + make_schema_at($self->schema_class, $loader_options, [ sub { $self->dbc->db_handle } ]); +} + +sub create_ddl { + my ($self) = @_; + my $schema = $self->connected_schema; + $schema->create_ddl_dir([$self->dump_driver], + '0.1', + $self->ddl_dir, + undef, # pre-version + { add_drop_table => 0 }, + ); +} + +sub patch_ddl { + my ($self) = @_; + my $ddl_file = $self->ddl_file; + my $file = read_file($ddl_file); + $file =~ s/INTEGER PRIMARY KEY/INTEGER PRIMARY KEY AUTOINCREMENT/g; + write_file($ddl_file, $file); + return; +} + +sub connected_schema { + my ($self) = @_; + return $self->schema_class->connect( [ sub { $self->dbc->db_handle } ] ); +} + +no Moose; + +# End of module + +package main; + +my $result = Bio::EnsEMBL::App::DumpTestSchema->new_with_options->run; +exit ($result ? $result : 0); + +# EOF diff --git a/scripts/harness.sh b/scripts/harness.sh new file mode 100755 index 0000000..cbc1c63 --- /dev/null +++ b/scripts/harness.sh @@ -0,0 +1,71 @@ +#!/bin/bash + +join_array() { local d=$1; shift; echo -n "$1"; shift; printf "%s" "${@/#/$d}"; } + +# Find some initial paths, where is this script, +# what is the repos' parent directory, and where +# are our dependencies installed +HARNESS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +PPWD="${PWD}/.." +ENSDIR="${ENSDIR:-$PPWD}" +setenv="$ENSDIR/ensembl/activate" + +# Setup the paths and perl5lib +source $setenv -vv $ENSDIR +if [ "$ENSDIR" != "$PPWD" ]; then + source $setenv -vvd $PWD +fi + +export TEST_AUTHOR=$USER + +# If there's a database configuration for this build type, link +# it in to place +if [ -f "modules/t/MultiTestDB.conf.$DB" ]; then + (cd modules/t && ln -sf MultiTestDB.conf.$DB MultiTestDB.conf) +fi + +# Build the PERL5OPT and SKIP_TESTS based on the environment +MATRIX=( "" "_$DB" "_COVERALLS_$COVERALLS" ) +PERL5OPT_array=() +SKIP_TESTS_array=() + +for h in "${MATRIX[@]}" +do + + PERL5OPT_var="PERL5OPT$h" + if [ ! -z ${!PERL5OPT_var} ]; then + PERL5OPT_array+=(${!PERL5OPT_var}) + fi + + SKIP_TESTS_var="SKIP_TESTS$h" + if [ ! -z ${!SKIP_TESTS_var} ]; then + SKIP_TESTS_array+=(${!SKIP_TESTS_var}) + fi +done + +if [ ${#PERL5OPT_array[@]} -ne 0 ]; then + PERL5OPT=$(join_array ' ' ${PERL5OPT_array[@]}) +# export PERL5OPT + echo "Using PERL5OPT=$PERL5OPT" +fi + +if [ ${#SKIP_TESTS_array[@]} -ne 0 ]; then + SKIP_TESTS='--skip ' + SKIP_TESTS+=$(join_array ',' ${SKIP_TESTS_array[@]}) +fi + +echo "Running test suite" +echo "Executing: perl $ENSDIR/ensembl-test/scripts/runtests.pl modules/t $SKIP_TESTS" +PERL5OPT=$PERL5OPT perl $ENSDIR/ensembl-test/scripts/runtests.pl modules/t $SKIP_TESTS + +rt=$? +if [ $rt -eq 0 ]; then + if [ "$COVERALLS" = 'true' ]; then + unset PERL5OPT + echo "Running Devel::Cover coveralls report" + cover --nosummary -report coveralls + fi + exit $? +else + exit $rt +fi diff --git a/scripts/load_database.pl b/scripts/load_database.pl new file mode 100644 index 0000000..0af18a6 --- /dev/null +++ b/scripts/load_database.pl @@ -0,0 +1,134 @@ +#!/usr/bin/env perl +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +use strict; +use warnings; + +use Bio::EnsEMBL::Test::MultiTestDB; +use Getopt::Long; +use Pod::Usage; + +local $ENV{RUNTESTS_HARNESS} = 1; + +sub run { + my ($class) = @_; + my $self = bless({}, 'main'); + $self->args(); + $self->load(); + $self->report_mysql_cmdline(); + $self->report_patch_cmdline(); + $self->report_dumper_cmdline(); + $self->report_mysqladmin_cmdline(); + return; +} + +sub args { + my ($self) = @_; + my $opts = {}; + GetOptions( + $opts, qw/ + curr_dir=s + species=s + type=s + help + man + / + ) or pod2usage(-verbose => 1, -exitval => 1); + pod2usage(-verbose => 1, -exitval => 0) if $opts->{help}; + pod2usage(-verbose => 2, -exitval => 0) if $opts->{man}; + $self->{opts} = $opts; + return; +} + +sub load { + my ($self) = @_; + my $mdb = Bio::EnsEMBL::Test::MultiTestDB->new($self->{opts}->{species}, $self->{opts}->{curr_dir}, 1); + $mdb->load_database($self->{opts}->{type}); + $mdb->create_adaptor($self->{opts}->{type}); + $self->{mdb} = $mdb; + return; +} + +sub report_mysql_cmdline { + my ($self) = @_; + my $dbc = $self->{mdb}->get_DBAdaptor($self->{opts}->{type})->dbc(); + my $password = ($dbc->password()) ? '--password='.$dbc->password() : q{}; + printf "MySQL command line: mysql --host=%s --port=%d --user=%s %s %s\n", + $dbc->host(), $dbc->port(), $dbc->username(), $password, $dbc->dbname(); +} + +sub report_patch_cmdline { + my ($self) = @_; + my $dbc = $self->{mdb}->get_DBAdaptor($self->{opts}->{type})->dbc(); + my $password = ($dbc->password()) ? '--pass '.$dbc->password() : q{}; + printf "Schema Patcher command line: schema_patcher.pl --host %s --port %d --user %s %s --database %s --verbose --fixlast --dryrun\n", + $dbc->host(), $dbc->port(), $dbc->username(), $password, $dbc->dbname(); +} + +sub report_dumper_cmdline { + my ($self) = @_; + my $dbc = $self->{mdb}->get_DBAdaptor($self->{opts}->{type})->dbc(); + my $password = ($dbc->password()) ? '--pass '.$dbc->password() : q{}; + printf "Database dumper command line: dump_mysql.pl --host %s --port %d --user %s %s --database %s --verbose --testcompatible --directory /tmp\n", + $dbc->host(), $dbc->port(), $dbc->username(), $password, $dbc->dbname(); +} + +sub report_mysqladmin_cmdline { + my ($self) = @_; + my $dbc = $self->{mdb}->get_DBAdaptor($self->{opts}->{type})->dbc(); + my $password = ($dbc->password()) ? '--password='.$dbc->password() : q{}; + printf "mysqladmin removal command line: mysqladmin --host=%s --port=%d --user=%s %s drop %s\n", + $dbc->host(), $dbc->port(), $dbc->username(), $password, $dbc->dbname(); +} + + +run(); + +1; +__END__ + +=head1 NAME + + load_database.pl + +=head1 SYNOPSIS + + ./load_database.pl --curr_dir ensembl/modules/t --species homo_sapiens --type core + +=head1 DESCRIPTION + +Attempts to load a test database and to leave it available on the specified +test server for patching and re-dumping. + +=head1 OPTIONS + +=over 8 + +=item B<--curr_dir> + +Current directory. Should be set to the directory which has your configuration files + +=item B<--species> + +Specify the species to load + +=item B<--type> + +Specify the type to load + +=back + +=cut \ No newline at end of file diff --git a/scripts/patch_test_databases.pl b/scripts/patch_test_databases.pl new file mode 100755 index 0000000..a5dacaa --- /dev/null +++ b/scripts/patch_test_databases.pl @@ -0,0 +1,264 @@ +#!/usr/bin/env perl +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +use strict; +use warnings; + +use Bio::EnsEMBL::Test::DumpDatabase; +use Bio::EnsEMBL::Test::MultiTestDB; +use File::Spec; +use Cwd; +use File::Basename; +use Getopt::Long; +use Pod::Usage; + +my %skip_species_list; +my %skip_groups_list = map { $_ => 1} qw/web hive/; + +sub run { + my ($class) = @_; + my $self = bless({}, 'main'); + $self->args(); + $self->has_config(); + $self->process(); + $self->cleanup_CLEAN(); + return; +} + +sub args { + my ($self) = @_; + + my $ud = File::Spec->updir(); + my $default_patcher = File::Spec->catdir(dirname(__FILE__), $ud, $ud, 'ensembl', 'misc-scripts', 'schema_patcher.pl'); + + my $opts = { + schema_patcher => $default_patcher, + }; + GetOptions( + $opts, qw/ + curr_dir|current_directory|directory|dir=s + schema_patcher=s + nofixlast! + noquiet! + noverbose! + interactive! + help + man + / + ) or pod2usage(-verbose => 1, -exitval => 1); + pod2usage(-verbose => 1, -exitval => 0) if $opts->{help}; + pod2usage(-verbose => 2, -exitval => 0) if $opts->{man}; + return $self->{opts} = $opts; +} + +sub has_config { + my ($self) = @_; + my $config = File::Spec->catfile($self->{opts}->{curr_dir}, 'MultiTestDB.conf'); + if(! -f $config) { + die "Cannot find a MultiTestDB.conf at '${config}'. Check your --curr_dir command line option"; + } + return; +} + +sub process { + my ($self) = @_; + my $dir = $self->{opts}->{curr_dir}; + my $config = $self->get_config(); + foreach my $species (keys %{$config->{databases}}) { + print STDOUT '='x80; print STDOUT "\n"; + if($skip_species_list{lc($species)}) { + print STDOUT "INFO: Skipping '$species' as it is in the patch ignore list\n"; + next; + } + my $multi = Bio::EnsEMBL::Test::MultiTestDB->new($species, $dir); + foreach my $group (keys %{$config->{databases}->{$species}}) { + if($skip_groups_list{lc($group)}) { + print STDOUT "INFO: Skipping '$group' as it is in the patch ignore list\n"; + next; + } + print STDOUT "INFO: Processing species '$species' and group '$group'\n"; + my $dba = $multi->get_DBAdaptor($group); + my $schema_details = $self->schema_details($dba); + $self->patch_db($dba); + $self->dump_db($dba, $schema_details); + } + $multi = undef; + print STDOUT "INFO: Finished working with species '$species'\n"; + print STDOUT '='x80; print STDOUT "\n"; + } + $self->convert_sqllite($dir); + return; +} + +sub schema_details { + my ($self, $dba) = @_; + my $h = $dba->dbc()->sql_helper(); + my $tables_sql = q{select TABLE_NAME, TABLE_TYPE from information_schema.TABLES where TABLE_SCHEMA = DATABASE()}; + my $tables = $h->execute(-SQL => $tables_sql); + my %details; + foreach my $t (@{$tables}) { + my ($table_name, $table_type) = @{$t}; + + my $checksum_sql = sprintf('CHECKSUM TABLE `%s`', $table_name); + my $checksum = $h->execute(-SQL => $checksum_sql); + + my $create_sql = sprintf('SHOW CREATE TABLE `%s`', $table_name); + my $create = $h->execute(-SQL => $create_sql); + + $details{$table_name} = { + is_table => ($table_type eq 'BASE TABLE' ? 1 : 0), + checksum => $checksum->[0]->[1], + create => $create->[0]->[1], + }; + } + return \%details; +} + +sub patch_db { + my ($self, $dba) = @_; + my $dbc = $dba->dbc(); + my %args_hash = ( + host => $dbc->host(), + port => $dbc->port(), + user => $dbc->username(), + database => $dbc->dbname(), + ); + $args_hash{pass} = $dbc->password() if $dbc->password(); + my @args = map { "-${_} ".$args_hash{$_} } keys %args_hash; + + my $program = $self->{opts}->{schema_patcher}; + my $nofixlast = $self->{opts}->{nofixlast}; + if (!$nofixlast) { push @args, "-fixlast"; } + my $noverbose = $self->{opts}->{noverbose}; + if (!$noverbose) { push @args, "-verbose"; } + my $noquiet = $self->{opts}->{noquiet}; + if (!$noquiet) { push @args, "-quiet"; } + my $interactive = $self->{opts}->{interactive}; + if (!$interactive) { push @args, "-nointeractive" ; } + my $arguments = join(q{ }, @args); + my $cmd = "$program $arguments"; + print STDERR "DEBUG: Submitting command '$cmd'\n"; + my $output = `$cmd`; + print STDERR $output; + my $rc = $? << 8; + if($rc != 0) { + die "Not good! The patch command did not succeed"; + } + return; +} + +sub dump_db { + my ($self, $dba, $old_schema_details) = @_; + my $new_schema_details = $self->schema_details($dba); + my $dir = Bio::EnsEMBL::Test::MultiTestDB->base_dump_dir($self->{opts}->{curr_dir}); + print STDERR "Will dump database to root of $dir\n"; + my $dumper = Bio::EnsEMBL::Test::DumpDatabase->new($dba, $dir, $old_schema_details, $new_schema_details); + $dumper->dump(); + return; +} + +sub convert_sqllite { + my ($self, $dir) = @_; + my $ud = File::Spec->updir(); + my $schema_converter = File::Spec->catdir(dirname(__FILE__), 'convert_test_schemas.sh'); + my $cwd = getcwd(); + my $is_absolute = File::Spec->file_name_is_absolute( $self->{opts}->{curr_dir}); + my $curr_dir; + if ($is_absolute) { + $curr_dir = File::Spec->catdir($self->{opts}->{curr_dir}); + } else { + $curr_dir = File::Spec->catdir($cwd, $self->{opts}->{curr_dir} ) ; + } + if ($curr_dir !~ /ensembl\/modules\/t/) { return; } + eval "require MooseX::App::Simple"; + system("$schema_converter $curr_dir") unless ($@); +} + +sub cleanup_CLEAN { + my ($self) = @_; + my $clean_test = File::Spec->catfile($self->{opts}->{curr_dir}, 'CLEAN.t'); + if(-f $clean_test) { + unlink $clean_test; + } + return; +} + +sub get_config { + my ($self) = @_; + my $dir = $self->{opts}->{curr_dir}; + return Bio::EnsEMBL::Test::MultiTestDB->get_db_conf($dir); +} + +run(); + +1; +__END__ + +=head1 NAME + + patch_test_databases.pl + +=head1 SYNOPSIS + + ./patch_test_databases.pl --curr_dir ensembl/modules/t [--schema_patcher PATCHER] + +=head1 DESCRIPTION + +For a given directory where tests are normally run (one with a +MultiTestDB.conf) this code will iterate through all available databases, +load them into the target database server, run schema_patcher.pl and then +redump into a single SQL file & multiple txt files. The code will also +ensure that redundant table dumps are cleaned up and will only initate a dump +when a data point has changed. + +=head1 OPTIONS + +=over 8 + +=item B<--curr_dir> + +Current directory. Should be set to the directory which has your configuration files + +=item B<--schema_patcher> + +Specify the location of the schema patcher script to use. If not specified we +assume a location of + +=item B<--nofixlast> + +Default schema_patcher option is to use fixlast. +With nofixlast option enabled, it will run for all known patches + +=item B<--noquiet> + +Default schema_patcher option is to use quiet, to hide warnings +With noquiet option enabled, warnings will be displayed + +=item B<--noverbose> + +Default schema_patcher option is to use verbose, to display extra information +With noverbose option enabled, the script is less verbose + +=item B<--interactive> + +Default schema_patcher option is to use nointeractive, for an non-interactive environment +With interactive option enabled, the script will require user input + + dirname(__FILE__)/../../ensembl/misc-scripts/schema_patcher.pl + +=back + +=cut diff --git a/scripts/runtests.pl b/scripts/runtests.pl new file mode 100755 index 0000000..4f394ce --- /dev/null +++ b/scripts/runtests.pl @@ -0,0 +1,195 @@ +#!/usr/bin/env perl +# Copyright [1999-2016] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +use strict; +use warnings; + +use File::Basename; +use File::Find; +use File::Spec; +use Getopt::Long; +use TAP::Harness; + +my $opts = { + clean => 0, + help => 0, + skip => [], + verbose => 0 +}; +my @args = ('clean|clear|c', 'help|h', 'verbose|v', 'list|tests|list-tests|l', 'skip=s@'); + +my $parse = GetOptions($opts, @args); +if(!$parse) { + print STDERR "Could not parse the given arguments. Please consult the help\n"; + usage(); + exit 1; +} + +# If we were not given a directory as an argument, assume current directory +push(@ARGV, File::Spec->curdir()) if ! @ARGV; + +# Print usage on '-h' command line option +if ($opts->{help}) { + usage(); + exit; +} + +# Get the tests +my $input_files_directories = [@ARGV]; +my @tests = eval { + get_all_tests($input_files_directories); +}; +if($@) { + printf(STDERR "Could not continue processing due to error: %s\n", $@); + exit 1; +} + +#Tests without cleans +my @no_clean_tests = sort grep { $_ !~ /CLEAN\.t$/ } @tests; + +if (@{$opts->{skip}}) { + my %skip = map { basename($_) => 1 } split(/,/, join(',', @{$opts->{skip}})); + printf STDERR "Skipping tests: %s\n", join(', ', sort keys %skip); + @no_clean_tests = grep { not $skip{basename($_)} } @no_clean_tests; +} + +# List test files on '-l' command line option +if ($opts->{list}) { + print "$_\n" for @no_clean_tests; + exit; +} + +# Make sure proper cleanup is done if the user interrupts the tests +$SIG{'HUP'} = $SIG{'KILL'} = $SIG{'INT'} = sub { + warn "\n\nINTERRUPT SIGNAL RECEIVED\n\n"; + clean(); + exit; +}; + +# Harness +my $harness = TAP::Harness->new({verbosity => $opts->{verbose}}); + +# Set environment variables +$ENV{'RUNTESTS_HARNESS'} = 1; + +# Run all specified tests +my $results; +eval { + $results = $harness->runtests(@no_clean_tests); +}; + +clean(); + +if($results->has_errors()) { + my $count = $results->failed(); + $count += $results->parse_errors(); + $count += $results->exit(); + $count += $results->wait(); + $count = 255 if $count > 255; + exit $count; +} + +sub usage { + print <<EOT; +Usage: +\t$0 [-c] [-v] [<test files or directories> ...] +\t$0 -l [<test files or directories> ...] +\t$0 -h + +\t-l|--list|--tests|--list-tests\n\t\tlist available tests +\t-c|--clean|--clear\n\t\trun tests and clean up in each directory +\t\tvisited (default is not to clean up) +\t--skip <test_name>[,<test_name>...]\n\t\tskip listed tests +\t-v|--verbose\n\t\tbe verbose +\t-h|--help\n\t\tdisplay this help text + +If no directory or test file is given on the command line, the script +will assume the current directory. +EOT +} + +=head2 get_all_tests + + Description: Returns a list of testfiles in the directories specified by + the @tests argument. The relative path is given as well as + with the testnames returned. Only files ending with .t are + returned. Subdirectories are recursively entered and the test + files returned within them are returned as well. + Returntype : listref of strings. + Exceptions : none + Caller : general + +=cut + +sub get_all_tests { + my @files; + my @out; + + #If we had files use them + if ( $input_files_directories && @{$input_files_directories} ) { + @files = @{$input_files_directories}; + } + #Otherwise use current directory + else { + push(@files, File::Spec->curdir()); + } + + my $is_test = sub { + my ($suspect_file) = @_; + return 0 unless $suspect_file =~ /\.t$/; + if(! -f $suspect_file) { + warn "Cannot find file '$suspect_file'"; + } + elsif(! -r $suspect_file) { + warn "Cannot read file '$suspect_file'"; + } + return 1; + }; + + while (my $file = shift @files) { + #If it was a directory use it as a point to search from + if(-d $file) { + my $dir = $file; + #find cd's to the dir in question so use relative for tests + find(sub { + if( $_ ne '.' && $_ ne '..' && $_ ne 'CVS') { + if($is_test->($_)) { + push(@out, $File::Find::name); + } + } + }, $dir); + } + #Otherwise add it if it was a test + else { + push(@out, $file) if $is_test->($file); + } + } + + return @out; +} + +sub clean { + # Unset environment variable indicating final cleanup should be + # performed + delete $ENV{'RUNTESTS_HARNESS'}; + if($opts->{clean}) { + my @new_tests = get_all_tests(); + my @clean_tests = grep { $_ =~ /CLEAN\.t$/ } @new_tests; + eval { $harness->runtests(@clean_tests); }; + warn $@ if $@; + } + return; +} -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ensembl-test.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
