This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository gubbins.
commit 63f5943e24e8a4818a457721343347e0ceaea01d Author: Andreas Tille <[email protected]> Date: Fri Aug 5 23:46:56 2016 +0200 Imported Upstream version 2.1.0 --- CHANGELOG | 5 ++ INSTALL.md | 2 + VERSION | 2 +- python/gubbins/RAxMLExecutable.py | 12 +++- python/gubbins/common.py | 2 +- .../input_alignment.fasta | 2 +- python/gubbins/tests/test_external_dependancies.py | 1 + python/scripts/run_gubbins.py | 2 + src/branch_sequences.c | 83 +++++++--------------- src/branch_sequences.h | 1 - src/string_cat.c | 9 +-- tests/check_branch_sequences.c | 18 ++--- 12 files changed, 53 insertions(+), 86 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 69f2409..223b398 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,8 @@ +v2.1.0 - 22 July 2016 +------ +Use GTRCAT model by default in RAxML instead of GTRGAMMA (massive speedup). +C code optimisations in Gubbins. + v2.0.0 - 26 May 2016 ------ Reconstruct internal sequences by default using RAxML rather than fastML. diff --git a/INSTALL.md b/INSTALL.md index 41e524f..f3de67b 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -97,6 +97,8 @@ autoreconf -i ./configure make sudo make install +cd python +sudo python3 setup.py install ``` ## OSX/Linux/Windows - Virtual Machine diff --git a/VERSION b/VERSION index 227cea2..7ec1d6d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0 +2.1.0 diff --git a/python/gubbins/RAxMLExecutable.py b/python/gubbins/RAxMLExecutable.py index a27188b..2401a32 100644 --- a/python/gubbins/RAxMLExecutable.py +++ b/python/gubbins/RAxMLExecutable.py @@ -23,18 +23,24 @@ import subprocess import re class RAxMLExecutable(object): - def __init__(self, threads, verbose = False ): + def __init__(self, threads, model = 'GTRCAT', verbose = False ): self.verbose = verbose self.threads = threads self.single_threaded_executables = ['raxmlHPC-AVX','raxmlHPC-SSE3','raxmlHPC'] self.multi_threaded_executables = ['raxmlHPC-PTHREADS-AVX','raxmlHPC-PTHREADS-SSE3','raxmlHPC-PTHREADS'] + self.model = model self.raxml_executable = self.select_executable_based_on_threads() - self.tree_building_parameters = ' -f d -p 1 -m GTRGAMMA ' + self.tree_building_parameters_gtrgamma = ' -f d -p 1 -m GTRGAMMA ' + self.tree_building_parameters_gtrcat = ' -f d -p 1 -m GTRCAT -V ' self.internal_sequence_parameters = ' -f A -p 1 -m GTRGAMMA ' def tree_building_command(self): - command = self.raxml_executable + self.threads_parameter() + self.tree_building_parameters + tree_building_parameters = self.tree_building_parameters_gtrcat + if self.model == 'GTRGAMMA': + tree_building_parameters =self.tree_building_parameters_gtrgamma + + command = self.raxml_executable + self.threads_parameter() + tree_building_parameters if self.verbose: print("Tree building command: "+command) return command diff --git a/python/gubbins/common.py b/python/gubbins/common.py index e1ae21d..b1f2e4e 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -102,7 +102,7 @@ class GubbinsCommon(): def parse_and_run(self): # Default parameters - raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.verbose) + raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.raxml_model, self.args.verbose) fasttree_executables = ['FastTree','fasttree'] FASTTREE_EXEC = GubbinsCommon.choose_executable(fasttree_executables) diff --git a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta index 6d75ec7..2367ec0 100644 --- a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta +++ b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta @@ -9,4 +9,4 @@ CCCTT >E CCTTT >F -CCGGG \ No newline at end of file +CCGGG diff --git a/python/gubbins/tests/test_external_dependancies.py b/python/gubbins/tests/test_external_dependancies.py index ef03d04..6066171 100644 --- a/python/gubbins/tests/test_external_dependancies.py +++ b/python/gubbins/tests/test_external_dependancies.py @@ -212,6 +212,7 @@ class TestExternalDependancies(unittest.TestCase): parser.add_argument('--threads', '-c', help='Number of threads to run with RAXML, but only if a PTHREADS version is available', type=int, default = 1) parser.add_argument('--converge_method', '-z', help='Criteria to use to know when to halt iterations [weighted_robinson_foulds|robinson_foulds|recombination]', default = 'weighted_robinson_foulds') parser.add_argument('--version', action='version', version=str(pkg_resources.get_distribution("gubbins").version)) + parser.add_argument('--raxml_model', '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT', default = 'GTRCAT') return parser def default_arg_parse(self): diff --git a/python/scripts/run_gubbins.py b/python/scripts/run_gubbins.py index 592a64d..35fc07c 100755 --- a/python/scripts/run_gubbins.py +++ b/python/scripts/run_gubbins.py @@ -21,6 +21,7 @@ import sys sys.path.append(".") +sys.path.append("..") import argparse import pkg_resources from gubbins import common @@ -44,6 +45,7 @@ parser.add_argument('--converge_method', '-z', help='Criteria to use to know wh parser.add_argument('--version', action='version', version=str(pkg_resources.get_distribution("gubbins").version)) parser.add_argument('--min_window_size', '-a', help='Minimum window size, default 100', type=int, default = 100) parser.add_argument('--max_window_size', '-b', help='Maximum window size, default 10000', type=int, default = 10000) +parser.add_argument('--raxml_model', '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT', default = 'GTRCAT') gubbins_runner = common.GubbinsCommon(parser.parse_args()) gubbins_runner.parse_and_run() diff --git a/src/branch_sequences.c b/src/branch_sequences.c index fcdbb2b..a7989e1 100644 --- a/src/branch_sequences.c +++ b/src/branch_sequences.c @@ -78,37 +78,31 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre { int num_snps_in_recombinations =0; int i = 0; + + // loop over each block for(i = 0; i<num_blocks; i++ ) { int current_index = 0; + // convert the starting coordinates of block to the nearest SNP index current_index = find_starting_index(current_block_coordinates[0][i],snp_locations,0, number_of_snps); - int j; - for(j = current_index; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++) + //make sure that the index begins at start of block + int beginning_j = current_index; + for(beginning_j = current_index; snp_locations[beginning_j] < current_block_coordinates[0][i];beginning_j++) + { + } + + int j; + // starting at the begining index of block, count all the snps until the end of the bock. + for(j = beginning_j; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++) { - if(snp_locations[j] >= current_block_coordinates[0][i] && snp_locations[j] <= current_block_coordinates[1][i]) - { - int k = 0; - int seen_before = 0; - // has this snp index been flagged before? - for(k =0; k < num_snps_in_recombinations; k++) - { - if(snps_in_recombinations[k] == j) - { - seen_before = 1; - break; - } - } - if(seen_before == 0) - { - snps_in_recombinations[num_snps_in_recombinations] = j; - num_snps_in_recombinations++; - } - } + snps_in_recombinations[num_snps_in_recombinations] = j; + num_snps_in_recombinations++; } } + + // may contain duplications return num_snps_in_recombinations; - } @@ -573,11 +567,6 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i // Set up the window counter with 1 value per base in the branch int * window_count; window_count = (int *) calloc((genome_size+1),sizeof(int)); - int i; - for(i =0; i< genome_size; i++) - { - window_count[i] = 0; - } // Integer array with location of gaps int * gaps_in_original_genome_space; @@ -626,7 +615,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i int in_block = 0; int block_lower_bound = 0; // Scan across the pileup and record where blocks are above the cutoff - + int i; for(i = 0; i < genome_size; i++) { // Just entered the start of a block @@ -1043,12 +1032,14 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt int * bases_to_be_excluded; bases_to_be_excluded = (int*) calloc((length_of_sequence + 1),sizeof(int)); + int genome_length = length_of_sequence; int i = 0; for(i = 0; i<length_of_sequence; i++) { if(sequence[i] == 'N' || sequence[i] == '-' ) { bases_to_be_excluded[i] = 1; + genome_length--; } } @@ -1064,41 +1055,15 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt int block_index = 0; for(block_index = block_coordinates[0][j]; block_index <= block_coordinates[1][j]; block_index++ ) { - bases_to_be_excluded[block_index-1] = 1; + if(bases_to_be_excluded[block_index-1] == 0) + { + bases_to_be_excluded[block_index-1] = 1; + genome_length--; + } } } - int genome_length = 0; - for(i = 0; i<length_of_sequence; i++) - { - if(bases_to_be_excluded[i] == 0 ) - { - genome_length++; - } - } return genome_length; } - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/branch_sequences.h b/src/branch_sequences.h index 64a3591..c8f6847 100644 --- a/src/branch_sequences.h +++ b/src/branch_sequences.h @@ -47,7 +47,6 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int length_of_sequence, int ** block_coordinates, int num_blocks); - #define WINDOW_SNP_MODE_TARGET 10 #define RANDOMNESS_DAMPNER 0.05 #define MAX_SAMPLE_NAME_SIZE 1024 diff --git a/src/string_cat.c b/src/string_cat.c index 50b3df3..17f3d5b 100644 --- a/src/string_cat.c +++ b/src/string_cat.c @@ -21,16 +21,9 @@ #include <stdlib.h> #include <string.h> - int size_of_string(char *input_string) { - int i = 0; - - while( input_string[i] != '\0') - { - i++; - } - return i; + return strlen(input_string); } void concat_strings_created_with_malloc(char *input_string, char *string_to_concat) diff --git a/tests/check_branch_sequences.c b/tests/check_branch_sequences.c index b6f8c90..78b9eca 100644 --- a/tests/check_branch_sequences.c +++ b/tests/check_branch_sequences.c @@ -153,22 +153,19 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination { int ** block_coords; block_coords = (int **) malloc(2*sizeof(int*)); - block_coords[0] = (int*) malloc((4)*sizeof(int )); - block_coords[1] = (int*) malloc((4)*sizeof(int )); + block_coords[0] = (int*) malloc((2)*sizeof(int )); + block_coords[1] = (int*) malloc((2)*sizeof(int )); block_coords[0][0] = 5; block_coords[1][0] = 10; block_coords[0][1] = 30; block_coords[1][1] = 35; - block_coords[0][2] = 20; - block_coords[1][2] = 25; - block_coords[0][3] = 7; - block_coords[1][3] = 15; + int snp_locations[16] = {1,4,5,6,7,10,11,15,19,20,29,30,35,36,40,50}; - int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int)); + int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int)); int num_snps_in_recombinations = 0; - num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,4,snp_locations,16, snps_in_recombinations); - fail_unless(num_snps_in_recombinations == 9); + num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,2,snp_locations,16, snps_in_recombinations); + fail_unless(num_snps_in_recombinations == 6); fail_unless(snps_in_recombinations[0] == 2); fail_unless(snps_in_recombinations[1] == 3); @@ -176,9 +173,6 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination fail_unless(snps_in_recombinations[3] == 5); fail_unless(snps_in_recombinations[4] == 11); fail_unless(snps_in_recombinations[5] == 12); - fail_unless(snps_in_recombinations[6] == 9); - fail_unless(snps_in_recombinations[7] == 6); - fail_unless(snps_in_recombinations[8] == 7); } END_TEST -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gubbins.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
