Author: glen Date: Fri Oct 30 13:23:39 2009 GMT Module: packages Tag: HEAD ---- Log message: - include in pld cvs
---- Files affected: packages/irssi-scripts: gtrans.pl (NONE -> 1.1) (NEW) ---- Diffs: ================================================================ Index: packages/irssi-scripts/gtrans.pl diff -u /dev/null packages/irssi-scripts/gtrans.pl:1.1 --- /dev/null Fri Oct 30 14:23:39 2009 +++ packages/irssi-scripts/gtrans.pl Fri Oct 30 14:23:34 2009 @@ -0,0 +1,681 @@ +#!/usr/bin/env perl -w +# vim: set sw=2 ts=2 sta et: + +# GTrans: Automatic translation in Irssi using the Google Language API +# by Sven Ulland <[email protected]>. License: GPLv2 +# +# DOCUMENTATION +# ------------- +# Introduction: +# This script brings the power of the Google Language API to Irssi. +# In short, it provides a quick way to translate incoming and +# outgoing IRC messages with minimal effort. While the result is +# far from professional quality, it is vastly superior to most other +# automatic translation engines. +# +# Prerequisites: +# Better results are achieved if you write properly. +# +# Only UTF-8 text is supported. Make sure your terminal handles it. +# +# The WebService::Google::Language Perl module is required for the +# script to work. It is unlikely that your system provides binary +# packages for this module, so you probably have to install it +# manually or through the CPAN shell: +# +# $ perl -MCPAN -e "install WebService::Google::Language" +# +# Quick testing: +# To quickly test the script to see what it can do, you can run the +# following command after starting Irssi and loading the script. It +# will translate the text and display the result in the current +# window. No text will be sent to IRC. +# +# /gtrans --test fi:this is a small test +# +# Another example to translate text and send it to the target +# (channel or query) in the currently active window: +# +# /gtrans fi:hello! this is a small test +# +# Normal operation: +# When loaded with default settings, the script does nothing. The +# reason for this is to maintain privacy: It is not a good idea to +# submit potentially sensitive information directly to Google. +# +# Automatic translation requires that the channel or nick that sends +# or receives the message, is in a whitelist. The following scenario +# will enable automatic translation for the channel #mychan and nick +# 'james': +# +# /set gtrans_my_lang en +# /set gtrans_input_auto ON +# /set gtrans_output_auto 2 +# /set gtrans_output_auto_lang fi +# /set gtrans_whitelist #mychan james +# +# Incoming or outgoing messages on the #mychan channel and queries +# from/to james will now be automatically translated: Incoming +# messages will be translated from any language to English; outgoing +# messages will be translated from any language to Finnish. +# +# Settings: +# The available settings are described below. The default value is +# shown in parentheses. +# +# gtrans_input_auto (ON) +# ON: Translate incoming messages that match gtrans_whitelist. +# Translate to the language specified by gtrans_my_lang. +# OFF: Don't translate incoming messages. +# +# gtrans_show_orig (ON) +# ON: Show the original, untranslated message, and display the +# translation on the next line. Applies to both incoming and +# outgoing messages. +# OFF: Translate messages transparently, hide original text. +# +# gtrans_output_auto (1) +# 0: Don't translate outgoing messages. +# 1: Translate outgoing messages only when the text is prefixed +# by "<lang>:". Example: fi:this is a small test. This will +# override the whitelist. +# 2: Translate outgoing messages automatically to the language +# specified by gtrans_output_auto_lang. Target has to match +# the whitelist. +# +# gtrans_output_auto_lang ("fi") +# xx: Set automatic output language to "xx". This applies to +# automatically translated outgoing messages when +# gtrans_output_auto is set to 2. +# +# gtrans_my_lang ("en") +# xx: Space-separated list of languages that should not be +# translated. Incoming messages will be translated to the +# first language in this list. Note: The language will be +# detected by sending the message to the Google API. +# +# gtrans_debug (0) +# 0: No debugging. +# 1: Light debugging. Useful to see what's going on. +# 2: Normal debugging. Slightly more verbose. +# 3: Medium debugging. Useful for troubleshooting. +# 4: Verbose debugging. Significant output. +# 5: Very verbose debugging. Lots of output. +# +# gtrans_whitelist ("") +# xx: Space-separated list of channels and nicks that can be +# translated. This applies to both incoming and outgoing +# messages. Specify "*" to whitelist everything. +# +# Links / more info: +# List of supported languages in the Google Language API: +# <URL:http://code.google.com/apis/ajaxlanguage/documentation/reference.html#LangNameArray> +# +# WebService::Google::Language Perl module at CPAN: +# <URL:http://search.cpan.org/~hma/WebService-Google-Language-0.02/lib/WebService/Google/Language.pm> +# +# TODO list: +# * What determines the value of isreliable? The API doesn't say. +# * Translate incoming/outgoing notices. +# * Translate incoming/outgoing topics. +# + Keep un-/translated topic in topic bar with a toggle. +# * Make debugging levels and messages more consistent. +# * Make whitelist work with servers/connections too. +# * Interact better with logging. +# * Better code reuse. Lots of duplication now. +# * Verify compatibility with other scripts/themes/configurations. +# + +use strict; + +use vars qw($VERSION %IRSSI); +use Irssi; +$VERSION = "0.0.1"; +%IRSSI = ( + authors => "Sven Ulland", + contact => "[email protected]", + name => "GTrans", + description => "Translation via the Google Language API", + license => "GPLv2", + url => "http://scripts.irssi.org/", + changed => $VERSION, + modules => "WebService::Google::Language", + commands => "gtrans" +); + +use Data::Dumper qw(Dumper); +use WebService::Google::Language; + +my $service = WebService::Google::Language->new( + "referer" => "http://scripts.irssi.org/", + "agent" => "$IRSSI{name} $VERSION for Irssi", + "timeout" => 5, + "src" => "", + "dest" => "", +); + +# Urgh. $glob_cmdpass is set to 1 when using gtrans_cmd() and later +# checked in event_output_msg(). The reason is that event_output_msg() +# is called twice: first by cmd_gtrans(), then by the event "send +# text". +my $glob_cmdpass = 0; + +sub dbg { + my ($level, $msg) = @_; + return unless ($level <= Irssi::settings_get_int("gtrans_debug")); + + my %dbgcol = ( + 1 => "%G", + 2 => "%Y", + 3 => "%C", + 4 => "%M", + 5 => "%R", + ); + + print CLIENTCRAP "%W$IRSSI{name} " . + "%Bdebug%W($dbgcol{$level}$level%W)>%n $msg"; +} + +sub err { + my $msg = shift; + print CLIENTCRAP "%W$IRSSI{name} %Rerror%W>%n $msg"; +} + +sub inf { + my $msg = shift; + print CLIENTCRAP "%W$IRSSI{name} %Ginfo%W>%n $msg"; +} + +sub usage { + print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . + "/$IRSSI{commands} [-t|--test] <lang>:<message>"; + print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . + "Example: %W/$IRSSI{commands} fr:this message " . + "will be translated to french and sent to the " . + "currently active window.%n"; + print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . + "Example: %W/$IRSSI{commands} -t fi:this " . + "message will be translated to finnish, but " . + "*won't* be sent out. use this to test " . + "translations.%n"; + print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " . + "There are several settings to modify " . + "translation behaviour. Type %W/set gtrans%n to " . + "see the available settings. See the script " . + "source for documentation."; +} + +sub dehtml { + # FIXME: The only HTML entity seen so far is ' + $_[0] =~ s/'/'/g; +} + +sub wgl_process { + my %args = @_; + dbg(5, "wgl_process(): input %args: " . Dumper(\%args)); + + my $result = $args{func}(%args); + dbg(4, "wgl_process() wgl_func() output: " . Dumper(\$result)); + + my $ok = 1; + if ($result->error) { + err(sprintf "wgl_process() wgl_func() code %s: %s", + $result->code, + $result->message); + $ok = 0; + } + + return $result; +} + +sub event_input_msg { + my $subname = "event_input_msg"; + my ($server, $msg, $nick, $address, $target) = @_; + + return unless Irssi::settings_get_bool("gtrans_input_auto"); + + my $sig = Irssi::signal_get_emitted(); + my $witem; + + dbg(5, "$subname() args: " . Dumper(\...@_)); + + my $do_translation = 0; + + if ($sig eq "message private") { + # Private message. + $witem = Irssi::window_item_find($nick); + + # Check whether the source $nick is in the whitelist. + dbg(3, "$subname() Looking for nick \"$nick\" in whitelist"); + foreach (split(/ /, + Irssi::settings_get_str("gtrans_whitelist"))) { + $do_translation = 1 if ($nick eq $_ or $_ eq "*"); + } + } else { # $sig eq "message public" + # Public message. + $witem = Irssi::window_item_find($target); + + # Check whether $target is in the whitelist. + dbg(3, "$subname() Looking for channel \"$target\" " . + "in whitelist"); + foreach (split(/ /, + Irssi::settings_get_str("gtrans_whitelist"))) { + $do_translation = 1 if ($target eq $_ or $_ eq "*"); + } + } + + unless ($do_translation) { + dbg(1, sprintf "Channel (\"$target\") or nick (\"$nick\") is " . + "not whitelisted"); + return; + } + + dbg(2, sprintf "$subname() Channel (\"$target\") or nick " . + "(\"$nick\") is whitelisted"); + + # Prepare arguments for language detection. + utf8::decode($msg); + my %args = ( + "func" => sub { $service->detect(@_) }, + "text" => $msg, + ); + + # Run language detection. + my $result = wgl_process(%args); + + dbg(4, "$subname() wgl_process() detect returned: " . + Dumper(\$result)); + + if ($result->error) { + dbg(1, "$subname(): Language detection failed"); + err(sprintf "Language detection failed with code %s: %s", + $result->code, $result->message); + return; + } + + # Don't translate my languages. + foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) { + $do_translation = 0 if($result->language eq $_); + } + + unless ($do_translation) { + dbg(2, "$subname() Incoming language " . + "\"$result->language\" matches my lang(s). " . + "Not translating."); + return; + } + + dbg(1, sprintf "Detected language \"%s\", confidence %.3f", + $result->language, $result->confidence); + + my $confidence = $result->confidence; + my $reliable = $result->is_reliable; + + # Prepare arguments for translation. + my %args = ( + "func" => sub { $service->translate(@_) }, + "text" => $msg, + "dest" => (split(/ /, + Irssi::settings_get_str("gtrans_my_lang")))[0] + ); + + # Run translation. + my $result = wgl_process(%args); + + dbg(4, "$subname() wgl_process() translate returned: " . + Dumper(\$result)); + + if ($result->error) { + dbg(1, "Translation failed"); + err(sprintf "Translation failed with code %s: %s", + $result->code, $result->message); + return; + } + + if (Irssi::settings_get_bool("gtrans_show_orig")) { + my $trmsg = sprintf "[%%B%s%%n:%s%.2f%%n] %s", + $result->language, + $reliable ? "%g" : "%r", + $confidence, + $result->translation; + utf8::decode($trmsg); + dehtml($trmsg); + + Irssi::signal_continue($server, $msg, $nick, $address, $target); + $witem->print($trmsg, MSGLEVEL_CLIENTCRAP); + } + else { + $msg = sprintf "[%s:%.2f] %s", + $result->language, + $confidence, + $result->translation; + utf8::decode($msg); + dehtml($msg); + + Irssi::signal_continue($server, $msg, $nick, $address, $target); + } + + dbg(1, "Incoming translation successful"); +} + +sub event_output_msg { + my $subname = "event_output_msg"; + my ($msg, $server, $witem, $force_lang) = @_; + + dbg(5, "$subname() args: " . Dumper(\...@_)); + + # Safeguard to stop double translations when using /gtrans. + if ($glob_cmdpass) { + $glob_cmdpass = 0; + Irssi::signal_continue($msg, $server, $witem); + return; + } + + return unless ( + (Irssi::settings_get_int("gtrans_output_auto") > 0 and + Irssi::settings_get_int("gtrans_output_auto") <= 2) + or $force_lang); + + # Determine destination language before doing translation. + my $dest_lang; + if($force_lang) { + $dest_lang = $force_lang; + } + elsif (Irssi::settings_get_int("gtrans_output_auto") eq 1) { + # Semiauto translation. Here we preprocess the msg to determine + # destination language. The WGL API cannot fetch the list of valid + # languages, so we simply try to see if the language is valid. + if ( $msg =~ /^([a-z]{2}(-[a-z]{2})?):(.*)/i) { + dbg(2, "$subname() dest_lang \"$1\", msg \"$3\""); + $dest_lang = $1; + $msg = $3; + } + } + elsif (Irssi::settings_get_int("gtrans_output_auto") eq 2) { + # Fully automated translation. + # To avoid accidents, verify that $witem->{name} is whitelisted. + dbg(3, "$subname() Looking for target \"" . + $witem->{name} . "\" in whitelist"); + + my $do_translation = 0; + foreach (split(/ /, + Irssi::settings_get_str("gtrans_whitelist"))) { + $do_translation = 1 if ($witem->{name} eq $_); + $do_translation = 1 if ($_ eq "*"); + } + + unless ($do_translation) { + dbg(1, sprintf "Target \"" . $witem->{name} . "\" is " . + "not whitelisted"); + return; + } + + dbg(2, sprintf "$subname() Target \"" . $witem->{name} . + "\" is whitelisted"); + $dest_lang = Irssi::settings_get_str("gtrans_output_auto_lang"); + } + + unless ($dest_lang and $msg) { + dbg(1, "Empty destination language or message"); + return; + } + + # Prepare arguments for translation. + utf8::decode($msg); + my %args = ( + "func" => sub { $service->translate(@_) }, + "text" => $msg, + "dest" => $dest_lang + ); + + # Run translation. + my $result = wgl_process(%args); + + dbg(4, "$subname() wgl_process() output: " . + Dumper(\$result)); + + if ($result->error) { + dbg(1, "$subname() Translation failed"); + err(sprintf "Translation failed with code %s: %s", + $result->code, $result->message); + return; + } + + my $trmsg; + if ($result->language ne $dest_lang) { + $trmsg = $result->translation; + utf8::decode($trmsg); + dehtml($trmsg); + } + + if($force_lang) { + # Emit new signal, since we came from cmd_gtrans(). + $glob_cmdpass = 1; # Don't translate in event_output_msg() + dbg(3, "$subname():" . __LINE__ . + " Emitting \"send text\" signal"); + Irssi::signal_emit("send text", $trmsg, $server, $witem); + return; + } + + Irssi::signal_continue($trmsg, $server, $witem); + + if (Irssi::settings_get_bool("gtrans_show_orig")) { + my $origmsg = sprintf "[orig:%%B%s%%n] %s", + $result->language, + $msg; + $witem->print($origmsg, MSGLEVEL_CLIENTCRAP); + } + + dbg(1, "Outbound auto-translation successful"); +} + +# FIXME: While topic translation is implemented, it needs more work to +# be useful. Until it is, the code is not active. +#sub event_topic { +# # signal "message own_public" parameters: +# # my ($server, $channel, $topic, $nick, $target) = @_; +# +# return unless Irssi::settings_get_bool("gtrans_topic_auto"); +# +# dbg(5, "event_topic() args: " . Dumper(\...@_)); +# +# my ($server, $channel, $msg, $nick, $target) = @_; +# +# my $do_translation = 0; +# +# # Check whether $channel is in the whitelist. +# dbg(3, "event_topic() Looking for channel \"$channel\" in " . +# "whitelist"); +# foreach (split(/ /, +# Irssi::settings_get_str("gtrans_whitelist"))) { +# $do_translation = 1 if ($channel eq $_); +# $do_translation = 1 if ($_ eq "*"); +# } +# +# unless ($do_translation) { +# dbg(1, sprintf "Channel $channel is not whitelisted. " . +# "Not translating topic"); +# return; +# } +# +# dbg(2, sprintf "event_topic() Channel $channel is whitelisted"); +# +# # Prepare arguments for language detection. +# utf8::decode($msg); +# my %args = ( +# "func" => sub { $service->detect(@_) }, +# "text" => $msg, +# ); +# +# # Run language detection. +# my $result = wgl_process(%args); +# +# dbg(4, "event_topic() wgl_process() detect returned: " . +# Dumper(\$result)); +# +# if ($result->error) { +# dbg(1, "event_topic(): Language detection failed"); +# err(sprintf "Language detection failed with code %s: %s", +# $result->code, $result->message); +# return; +# } +# +# # Don't translate my languages. +# foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) { +# $do_translation = 0 if($result->language eq $_); +# } +# +# unless ($do_translation) { +# dbg(2, "event_topic() Incoming language " . +# "\"$result->language\" matches my lang(s). " . +# "Not translating."); +# return; +# } +# +# dbg(1, sprintf "Detected language \"%s\", confidence %.3f", +# $result->language, $result->confidence); +# +# my $confidence = $result->confidence; +# +# # Prepare arguments for translation. +# my %args = ( +# "func" => sub { $service->translate(@_) }, +# "text" => $msg, +# "dest" => (split(/ /, +# Irssi::settings_get_str("gtrans_my_lang")))[0] +# ); +# +# # Run translation. +# my $result = wgl_process(%args); +# +# dbg(4, "event_topic() wgl_process() translate returned: " . +# Dumper(\$result)); +# +# if ($result->error) { +# dbg(1, "Topic translation failed"); +# err(sprintf "Topic translation failed with code %s: %s", +# $result->code, $result->message); +# return; +# } +# +# # FIXME: Don't alter messages! +# $msg = sprintf "[%s:%.2f] %s", +# $result->language, $confidence, $result->translation; +# +# utf8::decode($msg); +# dehtml($msg); +# +# # FIXME: More info about result? +# dbg(1, "Incoming topic translation successful"); +# +# Irssi::signal_continue($server, $channel, $msg, $nick, $target); +#} + +sub cmd_gtrans { + my $subname = "cmd_gtrans"; + my ($msg, $server, $witem) = @_; + + dbg(5, "$subname() input: " . Dumper(\...@_)); + + if ($msg =~ /^(|help|-h|--help|-t|--test)$/) { + usage(); + return; + } + + my $testing_mode = 0; + if ($msg =~ /^(-t|--test) /) { + $testing_mode = 1; + $msg =~ s/^(-t|--test) //; + } + + return unless ($testing_mode or <<Diff was trimmed, longer than 597 lines>> _______________________________________________ pld-cvs-commit mailing list [email protected] http://lists.pld-linux.org/mailman/listinfo/pld-cvs-commit
