Hello, I am a beginner in perl and I have segmentation fault in my code. the code run perfectly for the until the third iteration and it produce segmentation fault in the inner while loop in the 3rd iteration of the outer while loop. Would you please help me with hints. Your help will be appreciated
-- Motaz SAAD #!/usr/bin/perl -w # Code : Dake use strict; use Parse::MediaWikiDump; use utf8; use XML::Parser; use XML::Writer; use IO::File; binmode STDOUT, ":utf8"; #my $file = shift(@ARGV) or die "must specify a Mediawiki dump file"; my $enWiki = 'enwiki-latest-pages-articles.xml'; my $frWiki = 'frwiki-20111123-pages-articles.xml'; my $arWiki = 'arwiki-20111118-pages-articles.xml'; my $enPages = Parse::MediaWikiDump::Pages->new($enWiki); my $frPages = Parse::MediaWikiDump::Pages->new($frWiki); my $arPages = Parse::MediaWikiDump::Pages->new($arWiki); my $output = new IO::File("enfrar.xml", ">:utf8"); my $writer = new XML::Writer(OUTPUT => $output, DATA_MODE => 1,DATA_INDENT => 2); $writer->xmlDecl("UTF-8"); my $enPage; #my $frPage; #my $arPage; my $enId; #my $frId; #my $arId; my $enTitle; #my $arTitle; #my $frTitle; my $enText; #my $frText; #my $arText; my $EnCount = 0; my $EnArFrCount = 0; my $EnArCount = 0; my $EnFrCount = 0; my $testCount = 1; #my $category; $writer->startTag("en-fr-ar-wiki"); while(defined($enPage = $enPages->next)) {#for each english article #main namespace only next unless $enPage->namespace eq ''; $enId = $enPage->id; $enTitle = $enPage->title; #$category = $page->category; $enText = $enPage->text; $EnCount++; if($enTitle eq "A") {next;} #if (($$text =~ /\[\[en:/i) && ($$text =~ /\[\[ar:/i)) if ( ($$enText =~ m/\[\[fr:/i) && ($$enText =~ m/\[\[ar:/i) ) {# if the english article contains links for arabic and french articles print "\nlinks found for ar & fr in en article entitled: ", $enTitle , "\n"; my $frPage; my $arPage; my $frId = "id not found"; my $arId = "id not found"; my $arTitle; my $frTitle; my $frText; my $arText; $frText = "text not found"; $arText = "text not found"; $EnArFrCount++; $$enText =~ /\[\[fr:(.*?)\]/ ; $frTitle = $1; $$enText =~ /\[\[ar:(.*?)\]/ ; $arTitle = $1; #enforce the MediaWiki case rules #$frTitle = case_fixer($frTitle); print "searching for fr text in fr wiki\n"; while(defined($frPage = $frPages->next)) {#find the the french article id and text,,, search by article's title #main namespace only next unless $frPage->namespace eq ''; if ($frPage->title eq $frTitle) { my $frTextRef = $frPage->text; $frText = $$frTextRef; $frId = $frPage->id; print "fr text found\n"; last; } }#end while for extracting french article id and text ######################################## #enforce the MediaWiki case rules #$arTitle = case_fixer($arTitle); print "searching for ar text in ar wiki\n"; while(defined($arPage = $arPages->next)) {#find the the arabic article id and text,,, search by article's title #main namespace only next unless $arPage->namespace eq ''; if ($arPage->title eq $arTitle) { my $arTextRef = $arPage->text; $arText = $$arTextRef; $arId = $arPage->id; print "ar text found\n"; last; } }#end while for extracting arabic article id and text print $enId; print ","; print $enTitle; print ","; print $frTitle; print ","; print $arTitle; print"\n"; #SQL $writer->startTag("page"); ######################################## $writer->startTag("en"); $writer->startTag("id"); $writer->characters($enId); $writer->endTag(); $writer->startTag("title"); $writer->characters($enTitle); $writer->endTag(); $writer->startTag("text"); $writer->characters($$enText); $writer->endTag(); $writer->endTag(); print "finish writing english artile's id, title, and text to xml file\n"; ######################################## $writer->startTag("fr"); $writer->startTag("id"); $writer->characters($frId); $writer->endTag(); $writer->startTag("title"); $writer->characters($frTitle); $writer->endTag(); $writer->startTag("text"); $writer->characters($frText); $writer->endTag(); $writer->endTag(); print "finish writing french artile's id, title, and text to xml file \n"; ######################################## $writer->startTag("ar"); $writer->startTag("id"); $writer->characters($arId); $writer->endTag(); $writer->startTag("title"); $writer->characters($arTitle); $writer->endTag(); $writer->startTag("text"); $writer->characters($arText); $writer->endTag(); $writer->endTag(); print "finish writing arabic artile's id, title, and text to xml file \n"; ######################################## $writer->endTag(); print "closing page tag for the article num: ", $testCount, " \n"; $testCount++; # print $testCount; print "\t"; # if ($testCount == 10){ # print "\n"; # last; # } }#end if for en ar fr if (($$enText =~ /\[\[fr:/i)){ #print $page->title, "\n"; $EnFrCount ++; } if (($$enText =~ /\[\[ar:/i)){ #print $page->title, "\n"; $EnArCount ++; } #print "working !!!!\n"; }# end for while loop (for each english article) $writer->endTag(); $writer->end(); $output->close(); print "\n\n"; print "English : "; print $EnCount; print "\n\n"; print "English/Arabic/French : "; print $EnArFrCount; print "\n\n"; print "English/Arabic : "; print $EnArCount; print "\n\n"; print "English/French : "; print $EnFrCount; #removes any case sensativity from the very first letter of the title #but not from the optional namespace name sub case_fixer { my $title = shift; #check for namespace if ($title =~ /^(.+?):(.+)/) { $title = $1 . ':' . ucfirst($2); } else { $title = ucfirst($title); } return $title; } -- To unsubscribe, e-mail: beginners-unsubscr...@perl.org For additional commands, e-mail: beginners-h...@perl.org http://learn.perl.org/