> -----Original Message----- > From: Anirban Adhikary [mailto:anirban.adhik...@gmail.com] > Sent: Monday, May 04, 2009 06:40 > To: beginners@perl.org > Subject: Perl code for comparing two files > > Hi List > I am writing a perl code which will takes 2 more files as > argument. Then It > will check the the values of each line of a file with respect > with another > file. If some value matches then it will write the value > along with line > number to another ( say outputfile) file. > > The source files are as follow > > Contents of abc.txt > 1 2325278241,P0 > 2 2296250723,MH > 3 2296250724,MH > 4 2325277178,P0 > 5 7067023316,WL > 6 7067023329,WL > 7 2296250759,MH > 8 7067023453,WL > 9 7067023455,WL > 10 5000055413,EA05 > ####################################################### > Contents of xyz.txt > 1 7067023453,WL > 2 31-DEC-27,2O,7038590671 > 3 31-DEC-27,2O,7038596464 > 4 31-DEC-27,2O,7038596482 > 5 2296250724,MH > 6 31-DEC-27,2O,7038597632 > 7 31-DEC-27,2O,7038589511 > 8 31-DEC-11,2O,7038590671 > 9 7067023455,WL > 10 31-DEC-27,2O,7038555744 > ############################################################### > Contents of pqr.txt > 1 2325278241,P0 > 2 7067023316,WL > 3 7067023455,WL > 4 2296250724,MH > > > > Here is a way where a 'seen' hash has two array elements: [0] - count, [1]: file number and line number for each seen item. Code starts on next line: use strict; use warnings;
use Data::Dumper; my %seen; my $MyLineNbr = 1; my %MFN = (); my $MyFilenames = \%MFN; my $MyFileCnt = 1; my $MyCurrFile = q[]; my $MyActIdx = 1; while ( <> ) { if ( $ARGV ne $MyCurrFile ) { printf "Filename: %s\n", $ARGV; $MyCurrFile = $ARGV; $MyFilenames->{$MyCurrFile} = $MyFileCnt++; $MyLineNbr = 0; } chomp; $MyLineNbr++; next if ( /^\s*$/ ); my @elems = split (/ /, $_); my $value = $elems[1]; $seen{$value}[0]++; $seen{$value}[$MyActIdx] .= $MyFilenames->{$MyCurrFile} . q[;] . $MyLineNbr. q[^]; } print Dumper(\%seen); ^--- code ends here I leave to you to get the output, but this should give you what need to work with. If you have any questions and/or problems, please let me know. Thanks. Wags ;) David R. Wagner Senior Programmer Analyst FedEx Freight 1.719.484.2097 TEL 1.719.484.2419 FAX 1.408.623.5963 Cell http://fedex.com/us > ######################################################## > > For this requirement I have written the following code which > works fine for > 2 input files > > use strict; > use warnings; > > use Benchmark; > > if(@ARGV < 2) { > print "Please enter atleast two or more .orig file names \n"; > exit 0; > } > my @file_names = @ARGV; > chomp(@file_names); > my @files_to_process; > > for(@file_names) { > if( -s $_){ > print "File $_ exists\n"; > push(@files_to_process,$_); > } > elsif( -e $_) { > print "File $_ exists but it has zero byte size\n"; > } > else { > print "File $_ does not exists \n"; > } > } > > my $count = @files_to_process; > if( $count < 2 ) { > print "Atleast 2 .orig files are required to continue this > program\n"; > exit 0; > } > > my $output_file = "outputfile"; > my $value = 0; > my $start_time = new Benchmark; > > > if( $count >= 2 ) { > while ($count) { > my > ($files_after_processing_pointer,$return_val) = > create_itermediate_file (\...@files_to_process,$value); > my @files_after_processing = > @$files_after_processing_pointer; > $count = @files_after_processing; > $value = $return_val; > @files_to_process = @files_after_processing; > > } > > my $end_time = new Benchmark; > my $difference = timediff($end_time, $start_time); > print "It took ", timestr($difference), " to execute the > program\n"; > > } > > > > > sub create_itermediate_file { > my $file_pointer = $_[0]; > my $counter = $_[1]; > my @file_content = @$file_pointer; > > if($counter == 0) { > my($first_file,$second_file) = splice > (@file_content, 0, 2); > open my $orig_first, "<", $first_file > or die "could not open > $first_file: $!"; > open my $orig_second, "<", $second_file > or die "could not open > $second_file: > $!"; > open my $output_fh, ">", $output_file > or die "could not open > $output_file: > $!"; > > my %content_first; > while (my $line = > <$orig_first>) { > chomp $line; > if ($line) { > > my($line_num,$value) = split(" ",$line); > > $content_first{$value} = $line_num; > } > } > > my %content_second; > while (my $line = > <$orig_second>) { > chomp $line; > if ($line) { > > my($line_num,$value) = split(" ",$line); > > $content_second{$value} = $line_num; > } > } > > foreach my $key (sort keys > %content_second) { > if (exists > $content_first{$key} ) { > > print $output_fh > "$content_second{$key} $key" ,"\n"; > } > } > $counter += 1; > return (\...@file_content,$counter); > } > if ($counter != 0) { > my $file_pointer = $_[0]; > my $counter = $_[1]; > my @file_content_mod = @$file_pointer; > my($file_to_process) = > shift(@file_content_mod); > > > open my $orig_file, "<", $file_to_process > or die "could not open > $file_to_process: > $!"; > open my $output_fh, "<", $output_file > or die "could not open > $output_file: > $!"; > open my $output_fh_mod, ">", > $output_file."_mod" > or die "could not open", > $output_file."_mod : $!"; > > my %content_file_to_process; > while (my $line =<$orig_file>) { > chomp $line; > if ($line) { > > my($line_num,$value) = split(" ",$line); > > $content_file_to_process{$value} = $line_num; > } > } > > my %content_output_file; > while (my $line =<$output_fh>) { > chomp $line; > if ($line) { > > my($line_num,$value) = split(" ",$line); > > $content_output_file{$value} = $line_num; > } > } > > foreach my $key (sort keys > %content_output_file) { > if (exists > $content_file_to_process{$key} ) { > print > $output_fh_mod "$content_file_to_process{$key} $key" ,"\n"; > } > } > $counter += 1; > return (\...@file_content_mod,$counter); > > } > } > > > But when I am entering 3 file names as argument this is not > working.It is > working properly though I am using another file > $output_file_mod> It was > only written the similar lines between two compared files > where it should be > written the common lines of two compared files as well as > lines which were > not present in the last file but present in the $output_file. > I was trying > to use open my $output_fh, "+>>", $output_file or die "could not open > $output_file: $!"; syntax but it was not working.) > > Thanks & Regards in advance > Anirban Adhikary. > -- To unsubscribe, e-mail: beginners-unsubscr...@perl.org For additional commands, e-mail: beginners-h...@perl.org http://learn.perl.org/