#!/usr/bin/perl
# Contribution to Perl 6 Week - 1: A website for proto
#
# Author: Juan José 'Peco' San Martín <jsanmartin@gmail.com>
# Description: Basic script that downloads $source_list, and for each project extracts
# the one-line project description from the respective github pages.
# Date: 5-May-2010

use strict;
use warnings;
use LWP::Simple;
use CGI; # just used to return a webpage

# Source list
my $source_list= "http://github.com/masak/proto/raw/master/projects.list";

sub getdesc
# Helper sub to get descriptions parsing the project frontpage
{
	my $url=shift;
	my $web=get $url;
	# A regex to get the Description field.
	$web=~s/.*repository_description_edit">//s; # remove everything before
	$web=~s/<span id="read_more".*\Z//s; # remove everything after
	$web=~s/(\A.*<p>|\n*|\r*|\t*)//gs; # clean spaces, tabs
	$web=~s/[^[:ascii:]]+//g; # clean ascii
	return $web;
}

# Let's go!

my @list=split(/\n/,get $source_list); # get and parse the source list

my $project='';
my $owner='';

# Minimal structure to return a webpage
my $cgi=CGI->new;
print $cgi->header;
print "<html><head>Contribution to Perl 6 Week - 1: A website for proto</head>\n";
print "<body><div>\n";
print "<br>";
# Compose the output
foreach my $line(@list)
{
	# Two lines to parse the file, we're looking for project name and owner
	# Last elsif to get the Description and print it
	if($line=~/\A\w*\:\Z/){$line=~s/\:\Z//;$project=$line;}
	elsif($line=~/^ *owner:/){$line=~s/.*owner: *//;$owner=$line;}
	elsif($line eq '' && $owner ne '' && $project ne '')
	{
		my $url='http://github.com/'.$owner.'/'.$project;
		
		print "Project <a href=\"$url\">$project</a> (by $owner): ".getdesc($url)."<br><br>";
		
		$owner=$project='';
	}
}
print "</div></body></html>";
