#!/usr/bin/perl -Tw
use warnings;
use strict;

$|++;
use CGI;
use CGI qw(:standard);
use CGI::Carp qw/fatalsToBrowser/;

# we use the 2nd perl thread API. I think this means you need perl 5.6 or
# higher, compiled with thread support
use threads;
use threads::shared;
use Encode;
use HTML::Entities;
use HTML::Parser;
use LWP::UserAgent;
use URI;
use URI::Escape;
use lib 'lib';
use utf8;
use RemoteProcess;
use Subprocess;


my @MOSES_ADDRESSES = map "localhost:90$_",
    qw/01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16/;
my $INPUT_LANG  = 'hi';
my $OUTPUT_LANG = 'pu';

my @TOKENIZER_CMD   = ('./bin/tokenizer.perl',   '-l', $INPUT_LANG);
my @DETOKENIZER_CMD = ('./bin/detokenizer.perl', '-l', $OUTPUT_LANG);
my @TRANSLITRATE_CMD = ('./bin/translitrate.pl', '-l', $OUTPUT_LANG);
my %SOFT_TAGS = map {$_ => 1} qw/a b i u em font blink tt acronym/;

my %VERBATIM_TAGS = map {$_ => 1} qw/script style/;

my %URL_ATTRS = %{{
    a      => 'href',
    img    => 'src',
    form   => 'action',
    link   => 'href',
    script => 'src',
}};

my %TEXT_ATTR = %{{ input => [qw/value/], img => [qw/alt title/], }};
my $RE_EOS_TOKEN = qr/^(?:\.+|[\?!:;])$/;
my $RE_SPLIT_TOKEN = qr!^[\|\-]+$!;
my @segments;
$ENV{PATH} = '';


my ($buffer, @pairs, $pair, $name, $value, %FORM);
    # Read in text
    $ENV{'REQUEST_METHOD'} =~ tr/a-z/A-Z/;
    if ($ENV{'REQUEST_METHOD'} eq "POST")
    {
        read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
    }
   else 
   {
	$buffer = $ENV{'QUERY_STRING'};
   }
    # Split information into name/value pairs
    @pairs = split(/&/, $buffer);
    foreach $pair (@pairs)
    {
	($name, $value) = split(/=/, $pair);
	$value =~ tr/+/ /;
	$value =~ s/%(..)/pack("C", hex($1))/eg;
	$FORM{$name} = $value;
    }
my $txt = $FORM{txt};
my $url='';
my $output_line='';
my $output='';
my $buf_text_has_content = 0;
my $buf_text = '';
my @buf_tag_index;
my @buf_tag_stack;
my $output_text;
my $in_verbatim = 0;

my @input:shared=();
$txt = decode_utf8 ($txt);
@segments = split(/।/,$txt);

push (@input , @segments);
my @output:shared=();
my $next_job_i :shared = 0;
my $num_printed :shared = 0;

#--------------------------------------------------------------------------------------------------------------------------------------
my $thread_body = sub 
{
    	my ($moses_i) = @_;

    	# each thread uses it's own tokenizer and detokenizer subprocess
    	# (FIXME -- isn't this hugely inefficient?)
    	my $tokenizer   = new Subprocess (@TOKENIZER_CMD);
    	my $detokenizer = new Subprocess (@DETOKENIZER_CMD);
	my $translitrate = new Subprocess(@TRANSLITRATE_CMD);    	
	$tokenizer->start;
    	$detokenizer->start;
	$translitrate->start;

    	# each thread also connects to its own Moses server
    	my ($host, $port) = split /:/, $MOSES_ADDRESSES[$moses_i];
    	my $moses = new RemoteProcess ($host, $port) ||
        	die "Can't connect to '$host:$port'";
    	$moses->start;

    	for (;;) 
	{

        	# Snatch the next unassigned job from the queue
        	my $job_i;
        	{ 
			lock $next_job_i; $job_i = $next_job_i++; 
		}
        	last if ($job_i > $#input);
        	# If it's a text job, translate it, otherwise just don't do anything
        	$output[$job_i] = &translate_text_with_placeholders($input[$job_i], $moses, $tokenizer, $detokenizer, $translitrate)
		if (!defined $output[$job_i]);
	       	# Print out any sequential block of done jobs
        	lock $num_printed;
       		while ($num_printed < @input && defined $output[$num_printed]) 
		{
            		my $print;
                	$print = $segments[$num_printed];
            		$num_printed++;
        	}
	}
&print1;
};

if (@MOSES_ADDRESSES == 1) 
{

    	# If there's only one instance of Moses, there's no point in forking a
    	# single thread and waiting for it to complete, so we just run the thread
    	# code directly in the main thread
    	$thread_body->(0);

} 
else 
{
	# Start all threads and wait for them all to finish
    	my @threads = map 
	{
        	threads->create ($thread_body, $_);
    	} (0 .. $#MOSES_ADDRESSES);
    	$_->join foreach @threads;
}

#------------------------------------------------------------------------------
sub translate_text_with_placeholders 
{
    	my ($input_text, $moses, $tokenizer, $detokenizer, $translitrate) = @_;
    	my $traced_text = '';
    	my @tokens = split /\s+/, $tokenizer->do_line ($input_text);

   	# translate sentence by sentence
   	my $token_base_i = 0;
    	while (@tokens > 0) 
	{
        	# take a string of tokens up to the next sentence-ending token
        	my (@s_tokens, $split_token);
       		while (@tokens > 0) 
		{
                	push (@s_tokens, shift @tokens);
        	}
        	# Join together tokens into a plain text string. This is now ready to
        	# be shipped to Moses: all tags and placeholders have been removed,
        	# and it's a single sentence. We also lowercase as needed, and make
        	# a note of whether we did.
        	my $s_input_text = join (' ', @s_tokens);
		$s_input_text = $tokenizer->do_line ($s_input_text);		
        	my $s_traced_text = &_translate_text_moses ($s_input_text, $moses);
   		# Early post-translation formatting fixes

        	$s_traced_text .= " $split_token" if $split_token;
        	$s_traced_text =~ s{\s*\|(\d+)-(\d+)\|}{' |' . ($1+$token_base_i) . '-' . ($2+$token_base_i) . '| ';}ge;
       		$token_base_i += @s_tokens + ($split_token ? 1 : 0);
 		$traced_text .= $s_traced_text . ' ';
	} 
   	# Apply to every segment in the traced output the union of all tags
    	# that covered tokens in the corresponding source segment
    	$output_text = $translitrate->do_line ($traced_text);
    	$output_line = $detokenizer->do_line($output_text);
    	$output_line =~s{\s*\|(\d+)-(\d+)\|}//g;	
	$output=$output.$output_line."।\n";
	return $output_line;
}

sub _translate_text_moses 
{
    	my ($text, $moses) = @_;
    	my $traced_text = $moses->do_line ($text);
    	unless ($traced_text) 
	{
        	my @tokens = split /\s+/, $text;
        	# insert a fake trace if for some reason moses didn't return one
        	# (which most likely indicates something is quite wrong)
        	$traced_text = $text . " |0-$#tokens|";
    	}
   	 return $traced_text;
}
sub print1
{
    print "Content-Type: text/html; charset=UTF-8\n\n";

    print
    "<html>\n" .
    "  <head>\n" .
    "    <title>Anuvad</title>\n" .
    "    <style>\n" .
    "      p, a, b, body {\n" .
    "        font-family: Arial;\n" .
    "        font-size: 11pt;\n" .
    "      }\n" .
    "    </style>\n" .
    "  </head>\n";
	my $cgi = new CGI;
	print $cgi->start_html(-title=>'Anuvad');
	print $cgi->img({ src => '../../images/logo1.png',  
   		alt => 'Powered by Perl',
		});
	print " <table>";
	print "<tr>";
	print "<th>Type Your Hindi Text Here: </th>";
	print "<th></th>";
	print  "<th>Translated Text in Punjabi:</th>";
	print "</tr>";
	print "<tr>";
	print "<td> <textarea name='txt' charset='utf8' rows = '16' cols ='46'>$txt</textarea></td>";
	print "<td align='center' >  <INPUT Type='button' VALUE='Go Back' onClick='history.go(-1);return true;'></td>" ;
	print "<td><textarea name='txt1' charset='utf8' rows = '16' cols ='46'>$output</textarea></td>" ;

	print "</table>";
	print "</form>\n" ;
	print "<hr>";
	print "<font type = 'Arial' size = '1'>|Development Team: Dr. Gurpreet Singh Lehal & Dr. Vishal Goyal (Punjabi University Patiala), Mr. Ajit Kumar (Multani Mal Modi College, Patiala)| </font>";
	print "<font type = 'Arial' size = '1'> |Uses: Moses &copy Decoder| </font>";
	print  "</body>\n";
	print "</html>\n";
	$cgi->end_html;
}
