packages: irssi-scripts/gtrans.pl (NEW) - include in pld cvs
glen
glen at pld-linux.org
Fri Oct 30 14:23:39 CET 2009
Author: glen Date: Fri Oct 30 13:23:39 2009 GMT
Module: packages Tag: HEAD
---- Log message:
- include in pld cvs
---- Files affected:
packages/irssi-scripts:
gtrans.pl (NONE -> 1.1) (NEW)
---- Diffs:
================================================================
Index: packages/irssi-scripts/gtrans.pl
diff -u /dev/null packages/irssi-scripts/gtrans.pl:1.1
--- /dev/null Fri Oct 30 14:23:39 2009
+++ packages/irssi-scripts/gtrans.pl Fri Oct 30 14:23:34 2009
@@ -0,0 +1,681 @@
+#!/usr/bin/env perl -w
+# vim: set sw=2 ts=2 sta et:
+
+# GTrans: Automatic translation in Irssi using the Google Language API
+# by Sven Ulland <svensven at gmail.com>. License: GPLv2
+#
+# DOCUMENTATION
+# -------------
+# Introduction:
+# This script brings the power of the Google Language API to Irssi.
+# In short, it provides a quick way to translate incoming and
+# outgoing IRC messages with minimal effort. While the result is
+# far from professional quality, it is vastly superior to most other
+# automatic translation engines.
+#
+# Prerequisites:
+# Better results are achieved if you write properly.
+#
+# Only UTF-8 text is supported. Make sure your terminal handles it.
+#
+# The WebService::Google::Language Perl module is required for the
+# script to work. It is unlikely that your system provides binary
+# packages for this module, so you probably have to install it
+# manually or through the CPAN shell:
+#
+# $ perl -MCPAN -e "install WebService::Google::Language"
+#
+# Quick testing:
+# To quickly test the script to see what it can do, you can run the
+# following command after starting Irssi and loading the script. It
+# will translate the text and display the result in the current
+# window. No text will be sent to IRC.
+#
+# /gtrans --test fi:this is a small test
+#
+# Another example to translate text and send it to the target
+# (channel or query) in the currently active window:
+#
+# /gtrans fi:hello! this is a small test
+#
+# Normal operation:
+# When loaded with default settings, the script does nothing. The
+# reason for this is to maintain privacy: It is not a good idea to
+# submit potentially sensitive information directly to Google.
+#
+# Automatic translation requires that the channel or nick that sends
+# or receives the message, is in a whitelist. The following scenario
+# will enable automatic translation for the channel #mychan and nick
+# 'james':
+#
+# /set gtrans_my_lang en
+# /set gtrans_input_auto ON
+# /set gtrans_output_auto 2
+# /set gtrans_output_auto_lang fi
+# /set gtrans_whitelist #mychan james
+#
+# Incoming or outgoing messages on the #mychan channel and queries
+# from/to james will now be automatically translated: Incoming
+# messages will be translated from any language to English; outgoing
+# messages will be translated from any language to Finnish.
+#
+# Settings:
+# The available settings are described below. The default value is
+# shown in parentheses.
+#
+# gtrans_input_auto (ON)
+# ON: Translate incoming messages that match gtrans_whitelist.
+# Translate to the language specified by gtrans_my_lang.
+# OFF: Don't translate incoming messages.
+#
+# gtrans_show_orig (ON)
+# ON: Show the original, untranslated message, and display the
+# translation on the next line. Applies to both incoming and
+# outgoing messages.
+# OFF: Translate messages transparently, hide original text.
+#
+# gtrans_output_auto (1)
+# 0: Don't translate outgoing messages.
+# 1: Translate outgoing messages only when the text is prefixed
+# by "<lang>:". Example: fi:this is a small test. This will
+# override the whitelist.
+# 2: Translate outgoing messages automatically to the language
+# specified by gtrans_output_auto_lang. Target has to match
+# the whitelist.
+#
+# gtrans_output_auto_lang ("fi")
+# xx: Set automatic output language to "xx". This applies to
+# automatically translated outgoing messages when
+# gtrans_output_auto is set to 2.
+#
+# gtrans_my_lang ("en")
+# xx: Space-separated list of languages that should not be
+# translated. Incoming messages will be translated to the
+# first language in this list. Note: The language will be
+# detected by sending the message to the Google API.
+#
+# gtrans_debug (0)
+# 0: No debugging.
+# 1: Light debugging. Useful to see what's going on.
+# 2: Normal debugging. Slightly more verbose.
+# 3: Medium debugging. Useful for troubleshooting.
+# 4: Verbose debugging. Significant output.
+# 5: Very verbose debugging. Lots of output.
+#
+# gtrans_whitelist ("")
+# xx: Space-separated list of channels and nicks that can be
+# translated. This applies to both incoming and outgoing
+# messages. Specify "*" to whitelist everything.
+#
+# Links / more info:
+# List of supported languages in the Google Language API:
+# <URL:http://code.google.com/apis/ajaxlanguage/documentation/reference.html#LangNameArray>
+#
+# WebService::Google::Language Perl module at CPAN:
+# <URL:http://search.cpan.org/~hma/WebService-Google-Language-0.02/lib/WebService/Google/Language.pm>
+#
+# TODO list:
+# * What determines the value of isreliable? The API doesn't say.
+# * Translate incoming/outgoing notices.
+# * Translate incoming/outgoing topics.
+# + Keep un-/translated topic in topic bar with a toggle.
+# * Make debugging levels and messages more consistent.
+# * Make whitelist work with servers/connections too.
+# * Interact better with logging.
+# * Better code reuse. Lots of duplication now.
+# * Verify compatibility with other scripts/themes/configurations.
+#
+
+use strict;
+
+use vars qw($VERSION %IRSSI);
+use Irssi;
+$VERSION = "0.0.1";
+%IRSSI = (
+ authors => "Sven Ulland",
+ contact => "svensven\@gmail.com",
+ name => "GTrans",
+ description => "Translation via the Google Language API",
+ license => "GPLv2",
+ url => "http://scripts.irssi.org/",
+ changed => $VERSION,
+ modules => "WebService::Google::Language",
+ commands => "gtrans"
+);
+
+use Data::Dumper qw(Dumper);
+use WebService::Google::Language;
+
+my $service = WebService::Google::Language->new(
+ "referer" => "http://scripts.irssi.org/",
+ "agent" => "$IRSSI{name} $VERSION for Irssi",
+ "timeout" => 5,
+ "src" => "",
+ "dest" => "",
+);
+
+# Urgh. $glob_cmdpass is set to 1 when using gtrans_cmd() and later
+# checked in event_output_msg(). The reason is that event_output_msg()
+# is called twice: first by cmd_gtrans(), then by the event "send
+# text".
+my $glob_cmdpass = 0;
+
+sub dbg {
+ my ($level, $msg) = @_;
+ return unless ($level <= Irssi::settings_get_int("gtrans_debug"));
+
+ my %dbgcol = (
+ 1 => "%G",
+ 2 => "%Y",
+ 3 => "%C",
+ 4 => "%M",
+ 5 => "%R",
+ );
+
+ print CLIENTCRAP "%W$IRSSI{name} " .
+ "%Bdebug%W($dbgcol{$level}$level%W)>%n $msg";
+}
+
+sub err {
+ my $msg = shift;
+ print CLIENTCRAP "%W$IRSSI{name} %Rerror%W>%n $msg";
+}
+
+sub inf {
+ my $msg = shift;
+ print CLIENTCRAP "%W$IRSSI{name} %Ginfo%W>%n $msg";
+}
+
+sub usage {
+ print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+ "/$IRSSI{commands} [-t|--test] <lang>:<message>";
+ print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+ "Example: %W/$IRSSI{commands} fr:this message " .
+ "will be translated to french and sent to the " .
+ "currently active window.%n";
+ print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+ "Example: %W/$IRSSI{commands} -t fi:this " .
+ "message will be translated to finnish, but " .
+ "*won't* be sent out. use this to test " .
+ "translations.%n";
+ print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+ "There are several settings to modify " .
+ "translation behaviour. Type %W/set gtrans%n to " .
+ "see the available settings. See the script " .
+ "source for documentation.";
+}
+
+sub dehtml {
+ # FIXME: The only HTML entity seen so far is '
+ $_[0] =~ s/'/'/g;
+}
+
+sub wgl_process {
+ my %args = @_;
+ dbg(5, "wgl_process(): input %args: " . Dumper(\%args));
+
+ my $result = $args{func}(%args);
+ dbg(4, "wgl_process() wgl_func() output: " . Dumper(\$result));
+
+ my $ok = 1;
+ if ($result->error) {
+ err(sprintf "wgl_process() wgl_func() code %s: %s",
+ $result->code,
+ $result->message);
+ $ok = 0;
+ }
+
+ return $result;
+}
+
+sub event_input_msg {
+ my $subname = "event_input_msg";
+ my ($server, $msg, $nick, $address, $target) = @_;
+
+ return unless Irssi::settings_get_bool("gtrans_input_auto");
+
+ my $sig = Irssi::signal_get_emitted();
+ my $witem;
+
+ dbg(5, "$subname() args: " . Dumper(\@_));
+
+ my $do_translation = 0;
+
+ if ($sig eq "message private") {
+ # Private message.
+ $witem = Irssi::window_item_find($nick);
+
+ # Check whether the source $nick is in the whitelist.
+ dbg(3, "$subname() Looking for nick \"$nick\" in whitelist");
+ foreach (split(/ /,
+ Irssi::settings_get_str("gtrans_whitelist"))) {
+ $do_translation = 1 if ($nick eq $_ or $_ eq "*");
+ }
+ } else { # $sig eq "message public"
+ # Public message.
+ $witem = Irssi::window_item_find($target);
+
+ # Check whether $target is in the whitelist.
+ dbg(3, "$subname() Looking for channel \"$target\" " .
+ "in whitelist");
+ foreach (split(/ /,
+ Irssi::settings_get_str("gtrans_whitelist"))) {
+ $do_translation = 1 if ($target eq $_ or $_ eq "*");
+ }
+ }
+
+ unless ($do_translation) {
+ dbg(1, sprintf "Channel (\"$target\") or nick (\"$nick\") is " .
+ "not whitelisted");
+ return;
+ }
+
+ dbg(2, sprintf "$subname() Channel (\"$target\") or nick " .
+ "(\"$nick\") is whitelisted");
+
+ # Prepare arguments for language detection.
+ utf8::decode($msg);
+ my %args = (
+ "func" => sub { $service->detect(@_) },
+ "text" => $msg,
+ );
+
+ # Run language detection.
+ my $result = wgl_process(%args);
+
+ dbg(4, "$subname() wgl_process() detect returned: " .
+ Dumper(\$result));
+
+ if ($result->error) {
+ dbg(1, "$subname(): Language detection failed");
+ err(sprintf "Language detection failed with code %s: %s",
+ $result->code, $result->message);
+ return;
+ }
+
+ # Don't translate my languages.
+ foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
+ $do_translation = 0 if($result->language eq $_);
+ }
+
+ unless ($do_translation) {
+ dbg(2, "$subname() Incoming language " .
+ "\"$result->language\" matches my lang(s). " .
+ "Not translating.");
+ return;
+ }
+
+ dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
+ $result->language, $result->confidence);
+
+ my $confidence = $result->confidence;
+ my $reliable = $result->is_reliable;
+
+ # Prepare arguments for translation.
+ my %args = (
+ "func" => sub { $service->translate(@_) },
+ "text" => $msg,
+ "dest" => (split(/ /,
+ Irssi::settings_get_str("gtrans_my_lang")))[0]
+ );
+
+ # Run translation.
+ my $result = wgl_process(%args);
+
+ dbg(4, "$subname() wgl_process() translate returned: " .
+ Dumper(\$result));
+
+ if ($result->error) {
+ dbg(1, "Translation failed");
+ err(sprintf "Translation failed with code %s: %s",
+ $result->code, $result->message);
+ return;
+ }
+
+ if (Irssi::settings_get_bool("gtrans_show_orig")) {
+ my $trmsg = sprintf "[%%B%s%%n:%s%.2f%%n] %s",
+ $result->language,
+ $reliable ? "%g" : "%r",
+ $confidence,
+ $result->translation;
+ utf8::decode($trmsg);
+ dehtml($trmsg);
+
+ Irssi::signal_continue($server, $msg, $nick, $address, $target);
+ $witem->print($trmsg, MSGLEVEL_CLIENTCRAP);
+ }
+ else {
+ $msg = sprintf "[%s:%.2f] %s",
+ $result->language,
+ $confidence,
+ $result->translation;
+ utf8::decode($msg);
+ dehtml($msg);
+
+ Irssi::signal_continue($server, $msg, $nick, $address, $target);
+ }
+
+ dbg(1, "Incoming translation successful");
+}
+
+sub event_output_msg {
+ my $subname = "event_output_msg";
+ my ($msg, $server, $witem, $force_lang) = @_;
+
+ dbg(5, "$subname() args: " . Dumper(\@_));
+
+ # Safeguard to stop double translations when using /gtrans.
+ if ($glob_cmdpass) {
+ $glob_cmdpass = 0;
+ Irssi::signal_continue($msg, $server, $witem);
+ return;
+ }
+
+ return unless (
+ (Irssi::settings_get_int("gtrans_output_auto") > 0 and
+ Irssi::settings_get_int("gtrans_output_auto") <= 2)
+ or $force_lang);
+
+ # Determine destination language before doing translation.
+ my $dest_lang;
+ if($force_lang) {
+ $dest_lang = $force_lang;
+ }
+ elsif (Irssi::settings_get_int("gtrans_output_auto") eq 1) {
+ # Semiauto translation. Here we preprocess the msg to determine
+ # destination language. The WGL API cannot fetch the list of valid
+ # languages, so we simply try to see if the language is valid.
+ if ( $msg =~ /^([a-z]{2}(-[a-z]{2})?):(.*)/i) {
+ dbg(2, "$subname() dest_lang \"$1\", msg \"$3\"");
+ $dest_lang = $1;
+ $msg = $3;
+ }
+ }
+ elsif (Irssi::settings_get_int("gtrans_output_auto") eq 2) {
+ # Fully automated translation.
+ # To avoid accidents, verify that $witem->{name} is whitelisted.
+ dbg(3, "$subname() Looking for target \"" .
+ $witem->{name} . "\" in whitelist");
+
+ my $do_translation = 0;
+ foreach (split(/ /,
+ Irssi::settings_get_str("gtrans_whitelist"))) {
+ $do_translation = 1 if ($witem->{name} eq $_);
+ $do_translation = 1 if ($_ eq "*");
+ }
+
+ unless ($do_translation) {
+ dbg(1, sprintf "Target \"" . $witem->{name} . "\" is " .
+ "not whitelisted");
+ return;
+ }
+
+ dbg(2, sprintf "$subname() Target \"" . $witem->{name} .
+ "\" is whitelisted");
+ $dest_lang = Irssi::settings_get_str("gtrans_output_auto_lang");
+ }
+
+ unless ($dest_lang and $msg) {
+ dbg(1, "Empty destination language or message");
+ return;
+ }
+
+ # Prepare arguments for translation.
+ utf8::decode($msg);
+ my %args = (
+ "func" => sub { $service->translate(@_) },
+ "text" => $msg,
+ "dest" => $dest_lang
+ );
+
+ # Run translation.
+ my $result = wgl_process(%args);
+
+ dbg(4, "$subname() wgl_process() output: " .
+ Dumper(\$result));
+
+ if ($result->error) {
+ dbg(1, "$subname() Translation failed");
+ err(sprintf "Translation failed with code %s: %s",
+ $result->code, $result->message);
+ return;
+ }
+
+ my $trmsg;
+ if ($result->language ne $dest_lang) {
+ $trmsg = $result->translation;
+ utf8::decode($trmsg);
+ dehtml($trmsg);
+ }
+
+ if($force_lang) {
+ # Emit new signal, since we came from cmd_gtrans().
+ $glob_cmdpass = 1; # Don't translate in event_output_msg()
+ dbg(3, "$subname():" . __LINE__ .
+ " Emitting \"send text\" signal");
+ Irssi::signal_emit("send text", $trmsg, $server, $witem);
+ return;
+ }
+
+ Irssi::signal_continue($trmsg, $server, $witem);
+
+ if (Irssi::settings_get_bool("gtrans_show_orig")) {
+ my $origmsg = sprintf "[orig:%%B%s%%n] %s",
+ $result->language,
+ $msg;
+ $witem->print($origmsg, MSGLEVEL_CLIENTCRAP);
+ }
+
+ dbg(1, "Outbound auto-translation successful");
+}
+
+# FIXME: While topic translation is implemented, it needs more work to
+# be useful. Until it is, the code is not active.
+#sub event_topic {
+# # signal "message own_public" parameters:
+# # my ($server, $channel, $topic, $nick, $target) = @_;
+#
+# return unless Irssi::settings_get_bool("gtrans_topic_auto");
+#
+# dbg(5, "event_topic() args: " . Dumper(\@_));
+#
+# my ($server, $channel, $msg, $nick, $target) = @_;
+#
+# my $do_translation = 0;
+#
+# # Check whether $channel is in the whitelist.
+# dbg(3, "event_topic() Looking for channel \"$channel\" in " .
+# "whitelist");
+# foreach (split(/ /,
+# Irssi::settings_get_str("gtrans_whitelist"))) {
+# $do_translation = 1 if ($channel eq $_);
+# $do_translation = 1 if ($_ eq "*");
+# }
+#
+# unless ($do_translation) {
+# dbg(1, sprintf "Channel $channel is not whitelisted. " .
+# "Not translating topic");
+# return;
+# }
+#
+# dbg(2, sprintf "event_topic() Channel $channel is whitelisted");
+#
+# # Prepare arguments for language detection.
+# utf8::decode($msg);
+# my %args = (
+# "func" => sub { $service->detect(@_) },
+# "text" => $msg,
+# );
+#
+# # Run language detection.
+# my $result = wgl_process(%args);
+#
+# dbg(4, "event_topic() wgl_process() detect returned: " .
+# Dumper(\$result));
+#
+# if ($result->error) {
+# dbg(1, "event_topic(): Language detection failed");
+# err(sprintf "Language detection failed with code %s: %s",
+# $result->code, $result->message);
+# return;
+# }
+#
+# # Don't translate my languages.
+# foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
+# $do_translation = 0 if($result->language eq $_);
+# }
+#
+# unless ($do_translation) {
+# dbg(2, "event_topic() Incoming language " .
+# "\"$result->language\" matches my lang(s). " .
+# "Not translating.");
+# return;
+# }
+#
+# dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
+# $result->language, $result->confidence);
+#
+# my $confidence = $result->confidence;
+#
+# # Prepare arguments for translation.
+# my %args = (
+# "func" => sub { $service->translate(@_) },
+# "text" => $msg,
+# "dest" => (split(/ /,
+# Irssi::settings_get_str("gtrans_my_lang")))[0]
+# );
+#
+# # Run translation.
+# my $result = wgl_process(%args);
+#
+# dbg(4, "event_topic() wgl_process() translate returned: " .
+# Dumper(\$result));
+#
+# if ($result->error) {
+# dbg(1, "Topic translation failed");
+# err(sprintf "Topic translation failed with code %s: %s",
+# $result->code, $result->message);
+# return;
+# }
+#
+# # FIXME: Don't alter messages!
+# $msg = sprintf "[%s:%.2f] %s",
+# $result->language, $confidence, $result->translation;
+#
+# utf8::decode($msg);
+# dehtml($msg);
+#
+# # FIXME: More info about result?
+# dbg(1, "Incoming topic translation successful");
+#
+# Irssi::signal_continue($server, $channel, $msg, $nick, $target);
+#}
+
+sub cmd_gtrans {
+ my $subname = "cmd_gtrans";
+ my ($msg, $server, $witem) = @_;
+
+ dbg(5, "$subname() input: " . Dumper(\@_));
+
+ if ($msg =~ /^(|help|-h|--help|-t|--test)$/) {
+ usage();
+ return;
+ }
+
+ my $testing_mode = 0;
+ if ($msg =~ /^(-t|--test) /) {
+ $testing_mode = 1;
+ $msg =~ s/^(-t|--test) //;
+ }
+
+ return unless ($testing_mode or
<<Diff was trimmed, longer than 597 lines>>
More information about the pld-cvs-commit
mailing list