On Tue, Dec 20, 2011 at 11:25 AM, Motaz SAAD <motaz.s...@gmail.com> wrote:

> I am a beginner  in perl and I have segmentation fault in my code. the
> code run perfectly for the until the third iteration and it produce
> segmentation fault  in the inner while loop in the 3rd iteration of
> the outer while loop.
> Would you please help me with hints.
> Your help will be appreciated
>

If it were me, I would add "print" statements inside of the loop (see
below). When it segfaults, you see which two "print"s the error comes
between. Then move those closer and closer until you find the line causing
the error.  That narrows down the scope. It looks like you started down
that path already...


>
> #!/usr/bin/perl -w
>
> # Code : Dake
> use strict;
> use Parse::MediaWikiDump;
> use utf8;
> use XML::Parser;
> use XML::Writer;
> use IO::File;
>
> binmode STDOUT, ":utf8";
>
>
>
> #my $file = shift(@ARGV) or die "must specify a Mediawiki dump file";
> my $enWiki = 'enwiki-latest-pages-articles.xml';
> my $frWiki = 'frwiki-20111123-pages-articles.xml';
> my $arWiki = 'arwiki-20111118-pages-articles.xml';
>
> my $enPages = Parse::MediaWikiDump::Pages->new($enWiki);
> my $frPages = Parse::MediaWikiDump::Pages->new($frWiki);
> my $arPages = Parse::MediaWikiDump::Pages->new($arWiki);
>
>
> my $output = new IO::File("enfrar.xml", ">:utf8");
>
> my $writer = new XML::Writer(OUTPUT => $output, DATA_MODE =>
> 1,DATA_INDENT => 2);
> $writer->xmlDecl("UTF-8");
>
> my $enPage;
> #my $frPage;
> #my $arPage;
>
> my $enId;
> #my $frId;
> #my $arId;
>
> my $enTitle;
> #my $arTitle;
> #my $frTitle;
>
> my $enText;
> #my $frText;
> #my $arText;
>
>
>
> my $EnCount = 0;
> my $EnArFrCount = 0;
> my $EnArCount = 0;
> my $EnFrCount = 0;
> my $testCount = 1;
>
>
> #my $category;
>
> $writer->startTag("en-fr-ar-wiki");
>
> while(defined($enPage = $enPages->next)) {#for each english article
>
print "One\n";

>        #main namespace only
>        next unless $enPage->namespace eq '';
>
>        $enId = $enPage->id;
>        $enTitle = $enPage->title;
>        #$category = $page->category;
>        $enText = $enPage->text;
>
>        $EnCount++;
>
>
>        if($enTitle eq "A") {next;}
>
print "Two\n";

>
>
>
>
>
>
>
>        #if (($$text =~ /\[\[en:/i) && ($$text =~ /\[\[ar:/i))
>
>        if (  ($$enText =~ m/\[\[fr:/i)  && ($$enText =~ m/\[\[ar:/i)  ) {#
> if the english article contains links for arabic and french articles
>                print "\nlinks found for ar & fr in en article entitled:
> ",
> $enTitle , "\n";
>
>                my $frPage;
>                my $arPage;
>
>                my $frId = "id not found";
>                my $arId = "id not found";
>
>                my $arTitle;
>                my $frTitle;
>
>                my $frText;
>                my $arText;
>                $frText = "text not found";
>                $arText = "text not found";
>
>
>                $EnArFrCount++;
>                $$enText =~ /\[\[fr:(.*?)\]/  ;
>                $frTitle = $1;
>                $$enText =~ /\[\[ar:(.*?)\]/  ;
>                $arTitle = $1;
>
>
>                #enforce the MediaWiki case rules
>                #$frTitle = case_fixer($frTitle);
>                print "searching for fr text in fr wiki\n";
>
>                while(defined($frPage = $frPages->next)) {#find the the
> french
> article id and text,,, search by article's title
>                #main namespace only
>                next unless $frPage->namespace eq '';
>
print "Three\n";

>                        if ($frPage->title eq $frTitle) {
>                                my $frTextRef = $frPage->text;
>                                $frText = $$frTextRef;
>                                $frId = $frPage->id;
>                                print "fr text found\n";
>                                last;
>                        }
>                }#end while for extracting french article id and text
>
print "Four\n";

>
> ########################################
>
>                #enforce the MediaWiki case rules
>                #$arTitle = case_fixer($arTitle);
>                print "searching for ar text in ar wiki\n";
>
>                while(defined($arPage = $arPages->next)) {#find the the
> arabic
> article id and text,,, search by article's title
>                        #main namespace only
>                        next unless $arPage->namespace eq '';
>
print "Five\n";

>                        if ($arPage->title eq $arTitle) {
>                                my $arTextRef = $arPage->text;
>                                $arText = $$arTextRef;
>                                $arId = $arPage->id;
>                                print "ar text found\n";
>                                last;
>                        }
>                }#end while for extracting arabic article id and text
>
print "Six\n";

>
>
>
>
>
>                print $enId;
>                print ",";
>                print $enTitle;
>                print ",";
>                print $frTitle;
>                print ",";
>                print $arTitle;
>                print"\n";
>
>                #SQL
>                $writer->startTag("page");
> ########################################
>                $writer->startTag("en");
>
>                $writer->startTag("id");
>                $writer->characters($enId);
>                $writer->endTag();
>
>                $writer->startTag("title");
>                $writer->characters($enTitle);
>                $writer->endTag();
>
>                $writer->startTag("text");
>                $writer->characters($$enText);
>                $writer->endTag();
>
>                $writer->endTag();
>
>                print "finish writing english artile's id, title, and text
> to xml
> file\n";
> ########################################
>                $writer->startTag("fr");
>
>                $writer->startTag("id");
>                $writer->characters($frId);
>                $writer->endTag();
>
>                $writer->startTag("title");
>                $writer->characters($frTitle);
>                $writer->endTag();
>
>                $writer->startTag("text");
>                $writer->characters($frText);
>                $writer->endTag();
>
>                $writer->endTag();
>                print "finish writing french artile's id, title, and text
> to xml file
> \n";
> ########################################
>                $writer->startTag("ar");
>
>                $writer->startTag("id");
>                $writer->characters($arId);
>                $writer->endTag();
>
>                $writer->startTag("title");
>                $writer->characters($arTitle);
>                $writer->endTag();
>
>                $writer->startTag("text");
>                $writer->characters($arText);
>                $writer->endTag();
>
>                $writer->endTag();
>                print "finish writing arabic artile's id, title, and text
> to xml file
> \n";
> ########################################
>
>
>                $writer->endTag();
>                print "closing page tag for the article num: ", $testCount,
> " \n";
>                $testCount++;
>
> #               print $testCount; print "\t";
> #               if ($testCount == 10){
> #                       print "\n";
> #                       last;
> #               }
>
>
>        }#end if for en ar fr
>
>        if (($$enText =~ /\[\[fr:/i)){
>
>                #print $page->title, "\n";
>                $EnFrCount ++;
>        }
>
>        if (($$enText =~ /\[\[ar:/i)){
>                #print $page->title, "\n";
>                $EnArCount ++;
>        }
>        #print "working !!!!\n";
>
> }# end for while loop (for each english article)
>
>
> $writer->endTag();
> $writer->end();
> $output->close();
>
>
> print "\n\n";
> print "English : ";
> print $EnCount;
>
> print "\n\n";
> print "English/Arabic/French : ";
> print $EnArFrCount;
>
> print "\n\n";
> print "English/Arabic : ";
> print $EnArCount;
>
> print "\n\n";
> print "English/French : ";
> print $EnFrCount;
>
>
>
> #removes any case sensativity from the very first letter of the title
> #but not from the optional namespace name
> sub case_fixer {
>  my $title = shift;
>
>  #check for namespace
>  if ($title =~ /^(.+?):(.+)/) {
>    $title = $1 . ':' . ucfirst($2);
>  } else {
>    $title = ucfirst($title);
>  }
>
>  return $title;
> }
>

-- 
Robert Wohlfarth

Reply via email to