I am trying to merge two file based on a SNP_A-###### list in each file. For
some reason, the regular expression in the if loop does not work and I can
not match the key values generated from hash to the string from the input
file.  Could anybody help me detect the problem?

Thanks,

AG


#!usr/local/bin/perl

use strict;
use warnings;
open (DATA, "C:/perl/work/A172cn.txt") or die "Can not open file $!\n";
open (DATA2, "C:/perl/work/a127_gdas.txt") or die "Can not open file $! \n";

while(<DATA>)
{
my $mykey;
my $myvalue;
my %Hash;
my %mainhash = ();

next unless /^SNP/;
%Hash=getkeyvalue($mykey,$myvalue);

foreach $mykey (keys(%Hash)) 
        {

        my $inline ;

        while($inline = <DATA2>)
                {

                next unless /SNP/;
                #print "mykey $mykey my value: $Hash{$mykey} \n";
                if($inline =~ m/($mykey)/) 
                        {
                        print "$mykey $Hash{$mykey} $inline \n";
                        
                        }
                }
        }
}

sub getkeyvalue
{

my @line = ();
my $value;
my $col;


@line = split('\t', $_);

$col = $line[0];

chomp $col;
$value =join("\t", $line[1], $line[2]);
return ($col, $value);
}


#__DATA__
#SNP_A-1509443  3       3776202
#SNP_A_1518557  3       3776202
#SNP_A_1514538  5       5350951
#SNP_A_1516403  1       5483872
#BFFX-BioB-M_at  P P P P P A P
#[snip]

#__DATA2__
#SNP ID dbSNP RS ID     Chromosome      Physical Position       TSC ID
A172_Call       A172_Call Zone
#7085   SNP_A-1509443   rs1393064       1       2882121 TSC0565952      AA
0.02861
#4900   SNP_A-1518557   rs966321        1       3985402 TSC0273278      AA
0.152388
#8258   SNP_A-1517286   rs1599169       1       4804829 TSC0694296      BB
0.538696
#10947  SNP_A-1516024   rs580309        1       4982250 TSC1478148      AA
0.569713
#7794   SNP_A-1514538   rs1414379       1       5468765 TSC0609730      AA
0.299872
#9130   SNP_A-1516403   rs1890191       1       5596686 TSC0913001      AA
0.221319
#7214   SNP_A-1518687   rs1396904       1       6605831 TSC0574502      BB
0.040226
#526    SNP_A-1509959   rs950493        1       6654350 TSC0042354      BB
0.123611
#4345   SNP_A-1515791   rs845263        1       7133863 TSC0218512
NoCall  0.814947
#7914   SNP_A-1512212   rs1418490       1       7134783 TSC0617931      BB
0.077556
#4470   SNP_A-1513560   rs705695        1       7145191 TSC0246331      AA
0.700697
#8386   SNP_A-1519671   rs228651        1       7620645 TSC0730553      AA
0.09444
#4854   SNP_A-1515942   rs966134        1       8082754 TSC0272985      BB
0.212891
#637    SNP_A-1509129   rs205474        1       10542407        TSC0043572
BB      0.122514
#9481   SNP_A-1512107   rs1281034       1       10706737        TSC0984465
NoCall  10
#432    SNP_A-1514390   rs718206        1       11004020        TSC0041639
BB      0.66461
#10471  SNP_A-1518041   rs2206321       1       12221853        TSC1262794
AA      0.058009
#[snip]

here's another way...

use strict;
open DATA, 'data'
   or die 'Open DATA failed';
my %hash;
print "Build hash\n";
while (<DATA>){
   next unless /^SNP/;
   chomp;
   my ($snp_key,$value) = split /\t/;
   $snp_key =~ s/ +$//g; # if appears you may have whitespace at the end
   print "Key <$snp_key> exist more than once!\n"
   if exists($hash{$snp_key});
   print "<$snp_key> = $value\n"; # use for testing
   $hash{$snp_key}= $value;
   }
close DATA;
open DATA2, 'data2'
   or die 'Open DATA2 failed';
print "Look for same key\n";
while (my $data=<DATA2>) {
   chomp($data);
   (undef, my $snp_key) = split /\t/,$data;
   print "<$snp_key>\n"; # use for testing
   next unless $snp_key =~ /^SNP/;
   print join(',','found = ',$snp_key,$hash{$snp_key},$data), "\n"
      if exists($hash{$snp_key});
   }
close DATA2;

Hope this gives you some other ideas

jwm

-- 
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]
<http://learn.perl.org/> <http://learn.perl.org/first-response>


Reply via email to