> -----Original Message----- > From: news [mailto:n...@ger.gmane.org] On Behalf Of Richard Loveland > Sent: Friday, May 08, 2009 11:59 > To: beginners@perl.org > Subject: Re: Perl code for comparing two files > > -----BEGIN PGP SIGNED MESSAGE----- > Hash: SHA1 > > Mr. Adhikary, > > The following will take any number of files as arguments, in > the format > you described (I even tested it! :-)). It goes through each line of > those files, stuffing (the relevant part of) each line in a > 'seen' hash > (more on that, and other, hash techniques here if you're interested: > http://www.perl.com/pub/a/2006/11/02/all-about-hashes.html). > > The code below does not keep track of line numbers as you > requested, but > I think the hash technique used here could help you as you approach a > solution to your particular problem. > > > #!/usr/bin/perl > > use strict; > use warnings; > use File::Slurp; # This is where 'read_file' lives > > my %seen; > > for my $arg ( @ARGV ) { > my @lines = read_file( $arg ); > for my $line ( @lines ) { > chomp $line; > my @elems = split / /, $line; > my $value = $elems[1]; > $seen{$value}++; > } > } > > for my $k ( keys %seen ) { > print $k, "\n" if $seen{$k} > 1; > } > This is similar to above, but no File::Slurp and uses an hash combined with an array with [0] being the count of seen items, [> zero] is line number and index is the file it was from. I have given you a Data::Dumper. I ran with the fieles you provided.
#!/usr/bin/perl use strict; use warnings; use Data::Dumper; my %seen; my $MyLineNbr = 1; my %MFN = (); my $MyFilenames = \%MFN; my $MyFileCnt = 1; my $MyCurrFile = q[]; while ( <> ) { if ( $ARGV ne $MyCurrFile ) { printf "Filename: %s (%d)\n", $ARGV, $MyFileCnt; $MyCurrFile = $ARGV; $MyFilenames->{$MyCurrFile} = $MyFileCnt++; $MyLineNbr = 0; } chomp; $MyLineNbr++; next if ( /^\s*$/ ); my @elems = split (/ /, $_); my $value = $elems[1]; $seen{$value}[0]++; $seen{$value}[$MyFilenames->{$MyCurrFile}] = $MyLineNbr; } print Dumper(\%seen); > > Regards, > Rich Loveland > > > Anirban Adhikary wrote: > > Hi List > > I am writing a perl code which will takes 2 more files as > argument. Then It > > will check the the values of each line of a file with > respect with another > > file. If some value matches then it will write the value > along with line > > number to another ( say outputfile) file. > > > > The source files are as follow > > > > Contents of abc.txt > > 1 2325278241,P0 > > 2 2296250723,MH > > 3 2296250724,MH > > 4 2325277178,P0 > > 5 7067023316,WL > > 6 7067023329,WL > > 7 2296250759,MH > > 8 7067023453,WL > > 9 7067023455,WL > > 10 5000055413,EA05 > > ####################################################### > > Contents of xyz.txt > > 1 7067023453,WL > > 2 31-DEC-27,2O,7038590671 > > 3 31-DEC-27,2O,7038596464 > > 4 31-DEC-27,2O,7038596482 > > 5 2296250724,MH > > 6 31-DEC-27,2O,7038597632 > > 7 31-DEC-27,2O,7038589511 > > 8 31-DEC-11,2O,7038590671 > > 9 7067023455,WL > > 10 31-DEC-27,2O,7038555744 > > ############################################################### > > Contents of pqr.txt > > 1 2325278241,P0 > > 2 7067023316,WL > > 3 7067023455,WL > > 4 2296250724,MH > > > > > > > > > > ######################################################## > > > > For this requirement I have written the following code > which works fine for > > 2 input files > > > > use strict; > > use warnings; > > > > use Benchmark; > > > > if(@ARGV < 2) { > > print "Please enter atleast two or more .orig file names \n"; > > exit 0; > > } > > my @file_names = @ARGV; > > chomp(@file_names); > > my @files_to_process; > > > > for(@file_names) { > > if( -s $_){ > > print "File $_ exists\n"; > > push(@files_to_process,$_); > > } > > elsif( -e $_) { > > print "File $_ exists but it has zero byte size\n"; > > } > > else { > > print "File $_ does not exists \n"; > > } > > } > > > > my $count = @files_to_process; > > if( $count < 2 ) { > > print "Atleast 2 .orig files are required to continue this > > program\n"; > > exit 0; > > } > > > > my $output_file = "outputfile"; > > my $value = 0; > > my $start_time = new Benchmark; > > > > > > if( $count >= 2 ) { > > while ($count) { > > my > ($files_after_processing_pointer,$return_val) = > > create_itermediate_file (\...@files_to_process,$value); > > my @files_after_processing = > > @$files_after_processing_pointer; > > $count = @files_after_processing; > > $value = $return_val; > > @files_to_process = @files_after_processing; > > > > } > > > > my $end_time = new Benchmark; > > my $difference = timediff($end_time, $start_time); > > print "It took ", timestr($difference), " to execute > the program\n"; > > > > } > > > > > > > > > > sub create_itermediate_file { > > my $file_pointer = $_[0]; > > my $counter = $_[1]; > > my @file_content = @$file_pointer; > > > > if($counter == 0) { > > my($first_file,$second_file) = > splice > > (@file_content, 0, 2); > > open my $orig_first, "<", $first_file > > or die "could not open > $first_file: $!"; > > open my $orig_second, "<", $second_file > > or die "could not open > $second_file: > > $!"; > > open my $output_fh, ">", $output_file > > or die "could not open > $output_file: > > $!"; > > > > my %content_first; > > while (my $line = > <$orig_first>) { > > chomp $line; > > if ($line) { > > > > my($line_num,$value) = split(" ",$line); > > > > $content_first{$value} = $line_num; > > } > > } > > > > my %content_second; > > while (my $line = > <$orig_second>) { > > chomp $line; > > if ($line) { > > > > my($line_num,$value) = split(" ",$line); > > > > $content_second{$value} = $line_num; > > } > > } > > > > foreach my $key (sort keys > > %content_second) { > > if (exists > > $content_first{$key} ) { > > > print $output_fh > > "$content_second{$key} $key" ,"\n"; > > } > > } > > $counter += 1; > > return (\...@file_content,$counter); > > } > > if ($counter != 0) { > > my $file_pointer = $_[0]; > > my $counter = $_[1]; > > my @file_content_mod = @$file_pointer; > > my($file_to_process) = > shift(@file_content_mod); > > > > > > open my $orig_file, "<", > $file_to_process > > or die "could not open > $file_to_process: > > $!"; > > open my $output_fh, "<", $output_file > > or die "could not open > $output_file: > > $!"; > > open my $output_fh_mod, ">", > $output_file."_mod" > > or die "could not open", > > $output_file."_mod : $!"; > > > > my %content_file_to_process; > > while (my $line =<$orig_file>) { > > chomp $line; > > if ($line) { > > > > my($line_num,$value) = split(" ",$line); > > > > $content_file_to_process{$value} = $line_num; > > } > > } > > > > my %content_output_file; > > while (my $line =<$output_fh>) { > > chomp $line; > > if ($line) { > > > > my($line_num,$value) = split(" ",$line); > > > > $content_output_file{$value} = $line_num; > > } > > } > > > > foreach my $key (sort keys > > %content_output_file) { > > if (exists > > $content_file_to_process{$key} ) { > > print > > $output_fh_mod "$content_file_to_process{$key} $key" ,"\n"; > > } > > } > > $counter += 1; > > return (\...@file_content_mod,$counter); > > > > } > > } > > > > > > But when I am entering 3 file names as argument this is not > working.It is > > working properly though I am using another file > $output_file_mod> It was > > only written the similar lines between two compared files > where it should be > > written the common lines of two compared files as well as > lines which were > > not present in the last file but present in the > $output_file. I was trying > > to use open my $output_fh, "+>>", $output_file or die > "could not open > > $output_file: $!"; syntax but it was not working.) > > > > Thanks & Regards in advance > > Anirban Adhikary. > > > > -----BEGIN PGP SIGNATURE----- > Version: GnuPG v1.4.6 (GNU/Linux) > Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org > > iD8DBQFKBHLI4EG8v4hpG/ERAvcXAJ9YH9vEnpcgtvPPAqlJlpy9N5lXZQCfQlJM > zmPRzzSRHCzpi/EwAzDZM8E= > =8YbD > -----END PGP SIGNATURE----- > > > -- > To unsubscribe, e-mail: beginners-unsubscr...@perl.org > For additional commands, e-mail: beginners-h...@perl.org > http://learn.perl.org/ > > > -- To unsubscribe, e-mail: beginners-unsubscr...@perl.org For additional commands, e-mail: beginners-h...@perl.org http://learn.perl.org/