Dear members, I have a problem with a perl script pasted below. Since it is not a short script I was reluctant to paste all of it but then I thought it may be easier to understand if I pasted all of it. I greatly welcome any recommendations to refine the script (I predict many of you will have something to say about it), but my main concern is where it is going wrong? Below is part of the output, A and B. Snip A is ok but lines in snip B starting with "B: nomatch>" are wrong, which is why half of these lines are empty. Lines with "nomatch>" are expected but on manual inspection some are incorrectly labelled as such, including some of the B: lines I have here. I have included some parts of the input files after the script. Since it is a lot to take in I thank you all in advance for giving it a shot of any sort. I have tried but I cannot get my head round where I have made the mistake(s).
Cheers. Aim. __OUTPUT START__ A: CLUSTALFILE> CDSs_Contig_File_2 A: ismatch> M63553.1 1 1.000 0.957 55 70 FPrintScan 3.9e-04 frame f1 <=> frame f3 335565.2 5 66 81 FPrintScan 5.3e-04 -- A: ismatch> M63553.1 2 1.000 0.946 84 96 FPrintScan 4.7e-04 frame f1 <=> frame f3 335565.2 6 95 107 FPrintScan 6.3e-04 -- A: framehits> 0 0 7 0.000 0.000 1.000 1.000 7 B: CLUSTALFILE> CDSs_Contig_File_1 B: nomatch> X64146.1 1 1.000 0.000 16 40 FPrintScan 8.0e-12 B: nomatch> X64146.1 2 1.000 0.000 41 70 FPrintScan 8.0e-12 B: nomatch> X64146.1 3 1.000 0.000 71 100 FPrintScan 8.0e-12 B: nomatch> X64146.1 4 1.000 0.000 101 130 FPrintScan 8.0e-12 B: nomatch> X64146.1 5 1.000 0.000 146 165 FPrintScan 5.6e-08 B: ismatch> X64146.1 6 1.000 0.998 11 170 HMMPfam 8.6e-107 frame f1 <=> frame f2 335884.1 1 59 218 HMMPfam 8.6e-107 B: ismatch> X64146.1 7 1.000 0.997 11 118 HMMSmart 7.0e-77 frame f1 <=> frame f2 335884.1 2 59 166 HMMSmart 7.0e-77 __OUTPUT END__ __SCRIPT START__ !/usr/bin/perl -w use strict; ( @ARGV > 0 ) or die "usage: ????? {in-file}\n"; my @clust_file = @ARGV; my ($cds,$nc,$contig,$is_matched,$nhits,$coverage); my %cdscount = (); my %cds_frame = (); my %cds_db = (); my %cds_start = (); my %cds_end = (); my %cds_score = (); my %contigcount = (); my %contig_frame = (); my %contig_db = (); my %contig_start = (); my %contig_end = (); my %contig_score = (); my %apos1 = (); my %apos2 = (); # # read in cds data first # my $CDS_file = "File_A.data"; open(CDS, $CDS_file) or die "DEBUG2: unable to open file $CDS_file: $! \n"; while ( <CDS> ) { if ( /(^\S+\.\d+)\.(f\d)\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s/ ) { $cds = $1; $cdscount{$cds}++; $nc=$cdscount{$cds}; $cds_frame{$cds}[$nc] = $2; $cds_db{$cds}[$nc] = $3; $cds_start{$cds}[$nc] = 3*$4; # convert amino acid to nucleotide positions $cds_end{$cds}[$nc] = 3*$5; # convert amino acid to nucleotide positions $cds_score{$cds}[$nc] = ScoreFix($6); } } close(CDS); # read in contig data my $CONTIG_file = "File_B.data"; open(CONTIG, $CONTIG_file) or die "DEBUG2: unable to open file $CONTIG_file: $! \n"; while ( <CONTIG> ) { if ( /(^\S+\.\d+)\.(f\d)\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s/ ) { $contig = $1; $contigcount{$contig}++; $nc=$contigcount{$contig}; $contig_frame{$contig}[$nc] = $2; $contig_db{$contig}[$nc] = $3; $contig_start{$contig}[$nc] = 3*$4; # convert amino acid to nucleotide positions $contig_end{$contig}[$nc] = 3*$5; # convert amino acid to nucleotide positions $contig_score{$contig}[$nc] = ScoreFix($6); } } close(CONTIG); # read clustalw data one file at a time my ($key,$seq,$contigname,$cdsname); foreach my $file ( @clust_file ) { open(CLUSTAL, $file) or die "DEBUG1: unable to open file $file: $! \n"; print "CLUSTALFILE> $file\n"; $contig = ""; $cds = ""; while ( <CLUSTAL> ) { next if ( /^\s*$/ || /\*/ || /^CLUST/); chomp; if (/^(\S+)\s+(\S+)/) { #seq1# $key = $1; $seq = $2; if ( $key =~ /^\d/ ) { $contig .= $seq; $contigname = $key; } else { $cds .= $seq; $cdsname = $key; } } } close(CLUSTAL); # create indexing arrays my $rpos1 = 0; my $rpos2 = 0; my ($a,$b); foreach my $pos (1..length $contig) { $a = substr($cds,$pos-1,1); $b = substr($contig,$pos-1,1); $rpos1++ if ( $a ne '-' ); $rpos2++ if ( $b ne '-' ); $apos1{$rpos1}=$pos; $apos2{$rpos2}=$pos; } my %fh = (); my $th = 0; $fh{'f1'} = 0; $fh{'f2'} = 0; $fh{'f3'} = 0; # CDS point of view my ($s1,$e1,$s2,$e2,$overlap,$mh); if ( defined $cdscount{$cdsname} ) { foreach $nhits ( 1..$cdscount{$cdsname} ) { if ( $cds_frame{$cdsname}[$nhits] eq "f1" ) { $is_matched = 0; $s1 = $cds_start{$cdsname}[$nhits]; $e1 = $cds_end{$cdsname}[$nhits]; if ( ! defined $contigcount{$contigname} ) { #value if contig not in have IPR hit# if ( $cds_db{$cdsname}[$nhits] ne "Coil" ) { $coverage = GetCoverage($s1,$e1,$contig); printf "nomatch> %-10.10s %3d %5.3f %5.3f %4d %4d %-12.12s %8.1e contig %s has no hits\n", $cdsname,$nhits,$coverage,0.0,$s1/3,$e1/3,$cds_db{$cdsname}[$nhits], $cds_score{$cdsname}[$nhits],$contigname; } next; } foreach my $mhits ( 1..$contigcount{$contigname} ) { if ( $cds_db{$cdsname}[$nhits] eq $contig_db{$contigname}[$mhits] ) { $s2 = $contig_start{$contigname}[$mhits]; $e2 = $contig_end{$contigname}[$mhits]; $overlap = GetOverlap($s1,$e1,$s2,$e2); $coverage = GetCoverage($s1,$e1,$contig); if ( $overlap > 0.7 ) { $is_matched = 1; printf "ismatch> %-10.10s %3d %5.3f %5.3f %4d %4d %-12.12s %8.1e ", $cdsname,$nhits,$coverage,$overlap,$s1/3,$e1/3, $cds_db{$cdsname}[$nhits],$cds_score{$cdsname}[$nhits]; printf " frame f1 <=> frame %2s %-10.10s %3d %4d %4d %-12.12s %8.1e ", $contig_frame{$contigname}[$mhits],$contigname,$mhits,$s2/3,$e2/3, $contig_db{$contigname}[$mhits],$contig_score{$contigname}[$mhits]; if ( $contig_score{$contigname}[$mhits] > $cds_score{$cdsname}[$nhits] ) { print " -- \n"; } elsif ($contig_score{$contigname}[$mhits] < $cds_score{$cdsname}[$nhits] ) { print " ++ \n"; } else { print "\n"; } $fh{$contig_frame{$contigname}[$mhits]}++; $th++; $mhits = $contigcount{$contigname}; } } } if ( ! $is_matched && $cds_db{$cdsname}[$nhits] ne "Coil" ) { printf "nomatch> %-10.10s %3d %5.3f %5.3f %4d %4d %-12.12s %8.1e %-10.10s\n", $cdsname,$nhits,$coverage,0.0,$s1/3,$e1/3, $cds_db{$cdsname}[$nhits],$cds_score{$cdsname}[$nhits],$contigname; } } } } else { print "nothit> $cdsname has no hits \n"; } $mh = ( $fh{'f1'} > $fh{'f2'} ) ? $fh{'f1'} : $fh{'f2'}; $mh = ( $mh > $fh{"f3"} ) ? $mh : $fh{"f3"}; if ( $th > 0 ) { printf "framehits> %3d %3d %3d %5.3f %5.3f %5.3f %5.3f %4d \n", $fh{"f1"},$fh{"f2"},$fh{"f3"},$fh{"f1"}/$th,$fh{"f2"}/$th,$fh{"f3"}/$th,$mh/$th,$th; } } exit; sub GetCoverage { my ($s, $e, $contig) = @_; my ($n, $m); $n = $m = 0; for ( my $i=$s; $i<=$e; $i++) { $n++; $m++ if ( substr($contig,$i,1) ne '-' ); } return $m/$n; } sub GetOverlap { my ($s1,$e1,$s2,$e2) = @_; my ($ns1, $ns2, $ne1, $ne2); $ns1 = $apos1{$s1}; $ne1 = $apos1{$e1}; $ns2 = $apos2{$s2}; $ne2 = $apos2{$e2}; my $smax = ($ns1 > $ns2) ? $ns1 : $ns2; my $emin = ($ne1 > $ne2) ? $ne2 : $ne1; my $overlap = ($emin - $smax + 1)/($e1 - $s1 + 1); return $overlap; } sub ScoreFix { my $s = shift; $s =~ s/^e/1e/; return $s; } __SCRIPT END___ __File_A.data START __ M63553.1.f1 0169C0C2E528E25B 208 FPrintScan PR00153 CSAPPISMRASE 55 70 0.000392 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 FPrintScan PR00153 CSAPPISMRASE 84 96 0.0004696 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 FPrintScan PR00153 CSAPPISMRASE 127 142 2.184e-05 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 FPrintScan PR00153 CSAPPISMRASE 142 154 0.00264 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 FPrintScan PR00153 CSAPPISMRASE 155 170 2.464e-06 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 HMMPfam PF00160 pro_isomerase 36 197 1.7e-114 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f1 0169C0C2E528E25B 208 ProfileScan PS50072 CSA_PPIASE_2 38 195 47.499 T 11-Dec-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) M63553.1.f2 D4C09FEDAE405814 207 ProfileScan PS50323 ARG_RICH 2 65 9.497 T 11-Dec-2002 NULL NULL M63553.1.f3 23501CA3B611B62A 207 ProfileScan PS50323 ARG_RICH 19 139 10.014 T 11-Dec-2002 NULL NULL M63553.1.f3 23501CA3B611B62A 207 ProfileScan PS50315 GLY_RICH 2 173 10.279 T 11-Dec-2002 NULL NULL X64146.1.f1 89FA4BA1BFCBCEA2 171 FPrintScan PR00310 ANTIPRLFBTG1 16 40 8e-12 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 FPrintScan PR00310 ANTIPRLFBTG1 41 70 8e-12 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 FPrintScan PR00310 ANTIPRLFBTG1 71 100 8e-12 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 FPrintScan PR00310 ANTIPRLFBTG1 101 130 8e-12 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 FPrintScan PR00310 ANTIPRLFBTG1 146 165 5.584e-08 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 HMMPfam PF01211 Anti_proliferat 11 170 8.6e-107 T 12-Dec-2002 IPR002087 Anti-proliferative protein X64146.1.f1 89FA4BA1BFCBCEA2 171 HMMSmart SM00099 btg1 11 118 7e-77 T 12-Dec-2002 IPR002087 Anti-proliferative protein __File_A.data END__ __File_B.data START__ 335565.2.f3 B0DC0087DC4FBE8B 304 HMMPfam PF00160 pro_isomerase 47 208 4.2e-115 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f3 B0DC0087DC4FBE8B 304 FPrintScan PR00153 CSAPPISMRASE 138 153 2.944e-05 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f3 B0DC0087DC4FBE8B 304 FPrintScan PR00153 CSAPPISMRASE 153 165 0.001728 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f3 B0DC0087DC4FBE8B 304 FPrintScan PR00153 CSAPPISMRASE 166 181 3.32e-06 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f3 B0DC0087DC4FBE8B 304 FPrintScan PR00153 CSAPPISMRASE 66 81 0.0005272 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f3 B0DC0087DC4FBE8B 304 FPrintScan PR00153 CSAPPISMRASE 95 107 0.000632 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f1 A58984EC37123726 304 ProfileScan PS50323 ARG_RICH 3 77 9.670 T 25-Jul-2002 NULL NULL 335565.2.f2 22ED3D4C8B838162 304 ProfileScan PS50323 ARG_RICH 31 151 10.014 T 25-Jul-2002 NULL NULL 335565.2.f3 B0DC0087DC4FBE8B 304 ProfileScan PS50072 CSA_PPIASE_2 49 206 47.731 T 25-Jul-2002 IPR002130 Cyclophilin-type peptidyl-prolyl cis-trans isomerase Molecular Function: peptidylprolyl isomerase (GO:0003755), Cellular Component: cytoplasm (GO:0005737), Biological Process: protein folding (GO:0006457) 335565.2.f2 22ED3D4C8B838162 304 ProfileScan PS50315 GLY_RICH 14 113 9.456 T 25-Jul-2002 NULL NULL 335884.1.f2 269B66FBEF17C478 527 HMMPfam PF01211 Anti_proliferat 59 218 8.6e-107 T 20-Jul-2002 IPR002087 Anti-proliferative protein 335884.1.f2 AEC0A2BB26E70AC1 526 HMMSmart SM00099 btg1 59 166 7e-77 T 20-Jul-2002 IPR002087 Anti-proliferative protein 335884.1.f3 9D1D86D4577130FA 526 ProfileScan PS50099 PRO_RICH 27 76 13.052 T 20-Jul-2002 IPR000694 Proline-rich region __File_B.data END__ __ CDSs_Contig_File_1 START (entered on command line)__ X64146.1 -------------------------ATGCATCCCGCCCTGTACACCCGGGCCAGCATGAT 335884.1 CTGGCTGCCGGCCGCCGCCGCCTCCATGCATCCCGCCCTGTACACCCGGGCCAGCATGAT *********************************** X64146.1 ACGCGAGATCGCCGCGGCCGTGGCCTTCATCTCCAAGTTCCTGCGCACCAAGGGGCTGAT 335884.1 ACGCGAGATCGCCGCGGCCGTGGCCTTCATCTCCAAGTTCCTGCGCACCAAGGGGCTGAT ************************************************************ X64146.1 GAACGAACGGCAGTTACAGACCTTCAGCCAGAGCCTGCAGGAGCTGCTGGCAGAACATTA 335884.1 GAACGAACGGCAGTTACAGACCTTCAGCCAGAGCCTGCAGGAGCTGCTGGCAGAACATTA ************************************************************ X64146.1 TAAACACCACTGGTTCCCAGAAAAGCCATGCAAGGGATCAGGTTACCGATGTATCCGGAT 335884.1 TAAACACCACTGGTTCCCAGAAAAGCCATGCAAGGGATCAGGTTACCGATGTATCCGGAT ************************************************************ X64146.1 CAACCATAAAATGGATCCTCTCATTGGACAGGCAGCACAGCGGATTGGATTGAGCAGTCA 335884.1 CAACCATAAAATGGATCCTCTCATTGGACAGGCAGCACAGCGGATTGGATTGAGCAGTCA ************************************************************ X64146.1 GGAACTGTTCCAGCTTCTTCCGAGCGAACTCACTCTATGGGTTGACCCGTATGAAGTGTC 335884.1 GGAACTGTTCCAGCTTCTTCCGAGCGAACTCACTCTATGGGTTGACCCGTATGAAGTGTC ************************************************************ X64146.1 CTATCGTATTGGAGAGGATGGCTCAATCTGTGTGCTGTATGAAGCTGCACCAGCAGGAGG 335884.1 CTATCGTATTGGAGAGGATGGCTCAATCTGTGTGCTGTATGAAGCTGCACCAGCAGGAGG ************************************************************ X64146.1 TAGCCAAAATAACACCAACATGCAAATGGTAGACAGCAGAATAAGCTGTAAGGAGGAACT 335884.1 TAGCCAAAATAACACCAACATGCAAATGGTAGACAGCAGAATAAGCTGTAAGGAGGAACT ************************************************************ X64146.1 TCTCTTGGGCAGAACTAGCCCTTCCAAAAGCTACAATATGATGACTGTATCGGGTTAA-- 335884.1 TCTCTTGGGCAGAACTAGCCCTTCCAAAAGCTACAATATGATGACTGTATCGGGTTAAGA ********************************************************** X64146.1 ------------------------------------------------------------ 335884.1 TATAGTCAGTGGATGGATCACCTTAAAATGGATGGATAAGTTTGGTTTTTACTTTGGGTG __ CDSs_Contig_File_1 END__ __ CDSs_Contig_File_2 START (entered on command line)__ M63553.1 -----------------------------------ATGAAGGCTTTGGTGGCGGCGACGG 335565.2 GGCGGGCGCTGCGGCCGGGCTGCGATCGGCGGAGGATGAAGGCTTTGGTGGCGGCGACGG ************************* M63553.1 CGCTGGGGCCGGCGCTCCTACTGCTGCTGCCCGCCGCCTCGAGGGCCGACGAGCGCAAGA 335565.2 CGCTGGGCGCGGCGCTCCTACTGCTGCTGCCCGCCGCCTCGAGGGCCGACGAGCGCAAGA ******* *************************************************** M63553.1 AGGGCCCCAAGGTCACGGCCAAGGTGTTCTTCGACCTCCGCGTGGGCGAGGAGGATGCGG 335565.2 AGGGCCCCAAGGTCACGGCCAAGGTGTTCTTCGACCTCCGCGTGGGCGAGGAGGATGCGG ************************************************************ M63553.1 GCCGCGTCGTCATCGGGCTCTTCGGCAAAACGGTGCCCAAAACGGTGGAGAACTTCGTGG 335565.2 GCCGCGTCGTCATCGGGCTCTTCGGCAAAACGGTGCCCAAAACGGTGGAGAACTTCGTGG ************************************************************ M63553.1 CTTTGGCCACCGGGGAGAAAGGGTTCGGCTTCAAGGGCAGCAAGTTCCACCGCGTCATCA 335565.2 CTTTGGCCACCGGGGAGAAAGGGTTCGGCTTCAAGGGCAGCAAGTTCCACCGCGTCATCA ************************************************************ M63553.1 AGGACTTCATGATCCAGGGAGGGGACTTCACCCGCGGGGACGGCACCGGAGGGAAGAGCA 335565.2 AGGACTTCATGATCCAGGGAGGGGACTTCACCCGCGGGGACGGCACCGGAGGGAAGAGCA ************************************************************ M63553.1 TCTACGGAGACCGCTTCCCTGACGAGAACTTCAAGCTGAAGCACTACGGCCCCGGCTGGG 335565.2 TCTACGGAGACCGCTTCCCTGACGAGAACTTCAAGCTGAAGCACTACGGCCCCGGCTGGG ************************************************************ M63553.1 TGAGCATGGCCAACGCCGGCAAGGACACCAACGGCTCCCAGTTCTTCATCACCACGGTGA 335565.2 TGAGCATGGCCAACGCCGGCAAGGACACCAACGGCTCCCAGTTCTTCATCACCACGGTGA ************************************************************ M63553.1 AGACGGCGTGGCTGGATGGCAAGCACGTGGTGTTCGGCAAAGTGCTGGAGGGCATGGACG 335565.2 AGACGCCGTGGCTGGATGGCAAGCACGTGGTGTTCGGCAAAGTGCTGGAGGGCATGGACG ***** ****************************************************** M63553.1 TGGTGAGGAAGGTGGAGAACACCAAGACAGACAGCCGGGACAAACCCCTGAAGGACGTCA 335565.2 TGGTGAGGAAGGTGGAGAACACCAAGACAGACAGCCGGGACAAACCCCTGAAGGACGTCA ************************************************************ M63553.1 CCATTGCTGACTGCGGCACCATCGAGGTGGAGAAGCCCTTCG-CCATCGCCAAGGAGTGA 335565.2 CCATTGCTGACTGCGGCACCATCGAGGTGGAGAAGCCCTTCGGCCATCGCCAAGGAGTGA ****************************************** ***************** __ CDSs_Contig_File_2 END__ -- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]