packages: irssi-scripts/gtrans.pl (NEW) - include in pld cvs

glen glen at pld-linux.org
Fri Oct 30 14:23:39 CET 2009


Author: glen                         Date: Fri Oct 30 13:23:39 2009 GMT
Module: packages                      Tag: HEAD
---- Log message:
- include in pld cvs

---- Files affected:
packages/irssi-scripts:
   gtrans.pl (NONE -> 1.1)  (NEW)

---- Diffs:

================================================================
Index: packages/irssi-scripts/gtrans.pl
diff -u /dev/null packages/irssi-scripts/gtrans.pl:1.1
--- /dev/null	Fri Oct 30 14:23:39 2009
+++ packages/irssi-scripts/gtrans.pl	Fri Oct 30 14:23:34 2009
@@ -0,0 +1,681 @@
+#!/usr/bin/env perl -w
+# vim: set sw=2 ts=2 sta et:
+
+# GTrans: Automatic translation in Irssi using the Google Language API
+# by Sven Ulland <svensven at gmail.com>. License: GPLv2
+#
+# DOCUMENTATION
+# -------------
+# Introduction:
+#   This script brings the power of the Google Language API to Irssi.
+#   In short, it provides a quick way to translate incoming and
+#   outgoing IRC messages with minimal effort. While the result is
+#   far from professional quality, it is vastly superior to most other
+#   automatic translation engines.
+#
+# Prerequisites:
+#   Better results are achieved if you write properly.
+#
+#   Only UTF-8 text is supported. Make sure your terminal handles it.
+#
+#   The WebService::Google::Language Perl module is required for the
+#   script to work. It is unlikely that your system provides binary
+#   packages for this module, so you probably have to install it
+#   manually or through the CPAN shell:
+#
+#     $ perl -MCPAN -e "install WebService::Google::Language"
+#
+# Quick testing:
+#   To quickly test the script to see what it can do, you can run the
+#   following command after starting Irssi and loading the script. It
+#   will translate the text and display the result in the current
+#   window. No text will be sent to IRC.
+#
+#     /gtrans --test fi:this is a small test
+#
+#   Another example to translate text and send it to the target
+#   (channel or query) in the currently active window:
+#
+#     /gtrans fi:hello! this is a small test
+#
+# Normal operation:
+#   When loaded with default settings, the script does nothing. The
+#   reason for this is to maintain privacy: It is not a good idea to
+#   submit potentially sensitive information directly to Google.
+#
+#   Automatic translation requires that the channel or nick that sends
+#   or receives the message, is in a whitelist. The following scenario
+#   will enable automatic translation for the channel #mychan and nick
+#   'james':
+#
+#     /set gtrans_my_lang en
+#     /set gtrans_input_auto ON
+#     /set gtrans_output_auto 2
+#     /set gtrans_output_auto_lang fi
+#     /set gtrans_whitelist #mychan james
+#
+#   Incoming or outgoing messages on the #mychan channel and queries
+#   from/to james will now be automatically translated: Incoming
+#   messages will be translated from any language to English; outgoing
+#   messages will be translated from any language to Finnish.
+#
+# Settings:
+#   The available settings are described below. The default value is
+#   shown in parentheses.
+#
+#   gtrans_input_auto (ON)
+#     ON:  Translate incoming messages that match gtrans_whitelist.
+#          Translate to the language specified by gtrans_my_lang.
+#     OFF: Don't translate incoming messages.
+#
+#   gtrans_show_orig (ON)
+#     ON:  Show the original, untranslated message, and display the
+#          translation on the next line. Applies to both incoming and
+#          outgoing messages.
+#     OFF: Translate messages transparently, hide original text.
+#
+#   gtrans_output_auto (1)
+#     0:   Don't translate outgoing messages.
+#     1:   Translate outgoing messages only when the text is prefixed
+#          by "<lang>:". Example:  fi:this is a small test. This will
+#          override the whitelist.
+#     2:   Translate outgoing messages automatically to the language
+#          specified by gtrans_output_auto_lang. Target has to match
+#          the whitelist.
+#
+#   gtrans_output_auto_lang ("fi")
+#     xx:  Set automatic output language to "xx". This applies to
+#          automatically translated outgoing messages when
+#          gtrans_output_auto is set to 2.
+#
+#   gtrans_my_lang ("en")
+#     xx:  Space-separated list of languages that should not be
+#          translated. Incoming messages will be translated to the
+#          first language in this list. Note: The language will be
+#          detected by sending the message to the Google API.
+#
+#   gtrans_debug (0)
+#     0:   No debugging.
+#     1:   Light debugging. Useful to see what's going on.
+#     2:   Normal debugging. Slightly more verbose.
+#     3:   Medium debugging. Useful for troubleshooting.
+#     4:   Verbose debugging. Significant output.
+#     5:   Very verbose debugging. Lots of output.
+#
+#   gtrans_whitelist ("")
+#     xx:  Space-separated list of channels and nicks that can be
+#          translated. This applies to both incoming and outgoing
+#          messages. Specify "*" to whitelist everything.
+#
+# Links / more info:
+#   List of supported languages in the Google Language API:
+#     <URL:http://code.google.com/apis/ajaxlanguage/documentation/reference.html#LangNameArray>
+#
+#   WebService::Google::Language Perl module at CPAN:
+#     <URL:http://search.cpan.org/~hma/WebService-Google-Language-0.02/lib/WebService/Google/Language.pm>
+#
+# TODO list:
+#   * What determines the value of isreliable? The API doesn't say.
+#   * Translate incoming/outgoing notices.
+#   * Translate incoming/outgoing topics.
+#     + Keep un-/translated topic in topic bar with a toggle.
+#   * Make debugging levels and messages more consistent.
+#   * Make whitelist work with servers/connections too.
+#   * Interact better with logging.
+#   * Better code reuse. Lots of duplication now.
+#   * Verify compatibility with other scripts/themes/configurations.
+#
+
+use strict;
+
+use vars qw($VERSION %IRSSI);
+use Irssi;
+$VERSION = "0.0.1";
+%IRSSI = (
+    authors     => "Sven Ulland",
+    contact     => "svensven\@gmail.com",
+    name        => "GTrans",
+    description => "Translation via the Google Language API",
+    license     => "GPLv2",
+    url         => "http://scripts.irssi.org/",
+    changed     => $VERSION,
+    modules     => "WebService::Google::Language",
+    commands    => "gtrans"
+);
+
+use Data::Dumper qw(Dumper);
+use WebService::Google::Language;
+
+my $service = WebService::Google::Language->new(
+  "referer" => "http://scripts.irssi.org/",
+  "agent"   => "$IRSSI{name} $VERSION for Irssi",
+  "timeout" => 5,
+  "src"     => "",
+  "dest"    => "",
+);
+
+# Urgh. $glob_cmdpass is set to 1 when using gtrans_cmd() and later
+# checked in event_output_msg(). The reason is that event_output_msg()
+# is called twice: first by cmd_gtrans(), then by the event "send
+# text".
+my $glob_cmdpass = 0;
+
+sub dbg {
+  my ($level, $msg) = @_;
+  return unless ($level <= Irssi::settings_get_int("gtrans_debug"));
+
+  my %dbgcol = (
+    1 => "%G",
+    2 => "%Y",
+    3 => "%C",
+    4 => "%M",
+    5 => "%R",
+  );
+
+  print CLIENTCRAP "%W$IRSSI{name} " .
+                   "%Bdebug%W($dbgcol{$level}$level%W)>%n $msg";
+}
+
+sub err {
+  my $msg = shift;
+  print CLIENTCRAP "%W$IRSSI{name} %Rerror%W>%n $msg";
+}
+
+sub inf {
+  my $msg = shift;
+  print CLIENTCRAP "%W$IRSSI{name} %Ginfo%W>%n $msg";
+}
+
+sub usage {
+  print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+                   "/$IRSSI{commands} [-t|--test] <lang>:<message>";
+  print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+                   "Example: %W/$IRSSI{commands} fr:this message " .
+                   "will be translated to french and sent to the " .
+                   "currently active window.%n";
+  print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+                   "Example: %W/$IRSSI{commands} -t fi:this " .
+                   "message will be translated to finnish, but " .
+                   "*won't* be sent out. use this to test " .
+                   "translations.%n";
+  print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
+                   "There are several settings to modify " .
+                   "translation behaviour. Type %W/set gtrans%n to " .
+                   "see the available settings. See the script " .
+                   "source for documentation.";
+}
+
+sub dehtml {
+  # FIXME: The only HTML entity seen so far is &#39;
+  $_[0] =~ s/&#39;/'/g;
+}
+
+sub wgl_process {
+  my %args = @_;
+  dbg(5, "wgl_process(): input %args: " . Dumper(\%args));
+
+  my $result = $args{func}(%args);
+  dbg(4, "wgl_process() wgl_func() output: " . Dumper(\$result));
+
+  my $ok = 1;
+  if ($result->error) {
+    err(sprintf "wgl_process() wgl_func() code %s: %s",
+        $result->code,
+        $result->message);
+    $ok = 0;
+  }
+
+  return $result;
+}
+
+sub event_input_msg {
+  my $subname = "event_input_msg";
+  my ($server, $msg, $nick, $address, $target) = @_;
+
+  return unless Irssi::settings_get_bool("gtrans_input_auto");
+
+  my $sig = Irssi::signal_get_emitted();
+  my $witem;
+
+  dbg(5, "$subname() args: " . Dumper(\@_));
+
+  my $do_translation = 0;
+
+  if ($sig eq "message private") {
+    # Private message.
+    $witem = Irssi::window_item_find($nick);
+
+    # Check whether the source $nick is in the whitelist.
+    dbg(3, "$subname() Looking for nick \"$nick\" in whitelist");
+    foreach (split(/ /,
+        Irssi::settings_get_str("gtrans_whitelist"))) {
+      $do_translation = 1 if ($nick eq $_ or $_ eq "*");
+    }
+  } else { # $sig eq "message public"
+    # Public message.
+    $witem = Irssi::window_item_find($target);
+
+    # Check whether $target is in the whitelist.
+    dbg(3, "$subname() Looking for channel \"$target\" " .
+           "in whitelist");
+    foreach (split(/ /,
+        Irssi::settings_get_str("gtrans_whitelist"))) {
+      $do_translation = 1 if ($target eq $_ or $_ eq "*");
+    }
+  }
+
+  unless ($do_translation) {
+    dbg(1, sprintf "Channel (\"$target\") or nick (\"$nick\") is " .
+                   "not whitelisted");
+    return;
+  }
+
+  dbg(2, sprintf "$subname() Channel (\"$target\") or nick " .
+                 "(\"$nick\") is whitelisted");
+
+  # Prepare arguments for language detection.
+  utf8::decode($msg);
+  my %args = (
+    "func" => sub { $service->detect(@_) },
+    "text" => $msg,
+  );
+
+  # Run language detection.
+  my $result = wgl_process(%args);
+
+  dbg(4, "$subname() wgl_process() detect returned: " .
+         Dumper(\$result));
+
+  if ($result->error) {
+    dbg(1, "$subname(): Language detection failed");
+    err(sprintf "Language detection failed with code %s: %s",
+        $result->code, $result->message);
+    return;
+  }
+
+  # Don't translate my languages.
+  foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
+    $do_translation = 0 if($result->language eq $_);
+  }
+
+  unless ($do_translation) {
+    dbg(2, "$subname() Incoming language " .
+           "\"$result->language\" matches my lang(s). " .
+           "Not translating.");
+    return;
+  }
+
+  dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
+                 $result->language, $result->confidence);
+
+  my $confidence = $result->confidence;
+  my $reliable = $result->is_reliable;
+
+  # Prepare arguments for translation.
+  my %args = (
+    "func" => sub { $service->translate(@_) },
+    "text" => $msg,
+    "dest" => (split(/ /,
+        Irssi::settings_get_str("gtrans_my_lang")))[0]
+  );
+
+  # Run translation.
+  my $result = wgl_process(%args);
+
+  dbg(4, "$subname() wgl_process() translate returned: " .
+         Dumper(\$result));
+
+  if ($result->error) {
+    dbg(1, "Translation failed");
+    err(sprintf "Translation failed with code %s: %s",
+        $result->code, $result->message);
+    return;
+  }
+
+  if (Irssi::settings_get_bool("gtrans_show_orig")) {
+    my $trmsg = sprintf "[%%B%s%%n:%s%.2f%%n] %s",
+        $result->language,
+        $reliable ? "%g" : "%r",
+        $confidence,
+        $result->translation;
+    utf8::decode($trmsg);
+    dehtml($trmsg);
+
+    Irssi::signal_continue($server, $msg, $nick, $address, $target);
+    $witem->print($trmsg, MSGLEVEL_CLIENTCRAP);
+  }
+  else {
+    $msg = sprintf "[%s:%.2f] %s",
+        $result->language,
+        $confidence,
+        $result->translation;
+    utf8::decode($msg);
+    dehtml($msg);
+
+    Irssi::signal_continue($server, $msg, $nick, $address, $target);
+  }
+
+  dbg(1, "Incoming translation successful");
+}
+
+sub event_output_msg {
+  my $subname = "event_output_msg";
+  my ($msg, $server, $witem, $force_lang) = @_;
+
+  dbg(5, "$subname() args: " . Dumper(\@_));
+
+  # Safeguard to stop double translations when using /gtrans.
+  if ($glob_cmdpass) {
+    $glob_cmdpass = 0;
+    Irssi::signal_continue($msg, $server, $witem);
+    return;
+  }
+
+  return unless (
+      (Irssi::settings_get_int("gtrans_output_auto") > 0 and
+       Irssi::settings_get_int("gtrans_output_auto") <= 2)
+         or $force_lang);
+
+  # Determine destination language before doing translation.
+  my $dest_lang;
+  if($force_lang) {
+    $dest_lang = $force_lang;
+  }
+  elsif (Irssi::settings_get_int("gtrans_output_auto") eq 1) {
+    # Semiauto translation. Here we preprocess the msg to determine
+    # destination language. The WGL API cannot fetch the list of valid
+    # languages, so we simply try to see if the language is valid.
+    if ( $msg =~ /^([a-z]{2}(-[a-z]{2})?):(.*)/i) {
+      dbg(2, "$subname() dest_lang \"$1\", msg \"$3\"");
+      $dest_lang = $1;
+      $msg = $3;
+    }
+  }
+  elsif (Irssi::settings_get_int("gtrans_output_auto") eq 2) {
+    # Fully automated translation.
+    # To avoid accidents, verify that $witem->{name} is whitelisted.
+    dbg(3, "$subname() Looking for target \"" .
+           $witem->{name} . "\" in whitelist");
+
+    my $do_translation = 0;
+    foreach (split(/ /,
+        Irssi::settings_get_str("gtrans_whitelist"))) {
+      $do_translation = 1 if ($witem->{name} eq $_);
+      $do_translation = 1 if ($_ eq "*");
+    }
+
+    unless ($do_translation) {
+      dbg(1, sprintf "Target \"" . $witem->{name} . "\" is " .
+                     "not whitelisted");
+      return;
+    }
+
+    dbg(2, sprintf "$subname() Target \"" . $witem->{name} .
+                   "\" is whitelisted");
+    $dest_lang = Irssi::settings_get_str("gtrans_output_auto_lang");
+  }
+
+  unless ($dest_lang and $msg) {
+    dbg(1, "Empty destination language or message");
+    return;
+  }
+
+  # Prepare arguments for translation.
+  utf8::decode($msg);
+  my %args = (
+    "func" => sub { $service->translate(@_) },
+    "text" => $msg,
+    "dest" => $dest_lang
+  );
+
+  # Run translation.
+  my $result = wgl_process(%args);
+
+  dbg(4, "$subname() wgl_process() output: " .
+         Dumper(\$result));
+
+  if ($result->error) {
+    dbg(1, "$subname() Translation failed");
+    err(sprintf "Translation failed with code %s: %s",
+        $result->code, $result->message);
+    return;
+  }
+
+  my $trmsg;
+  if ($result->language ne $dest_lang) {
+    $trmsg = $result->translation;
+    utf8::decode($trmsg);
+    dehtml($trmsg);
+  }
+
+  if($force_lang) {
+    # Emit new signal, since we came from cmd_gtrans().
+    $glob_cmdpass = 1; # Don't translate in event_output_msg()
+    dbg(3, "$subname():" . __LINE__ .
+           " Emitting \"send text\" signal");
+    Irssi::signal_emit("send text", $trmsg, $server, $witem);
+    return;
+  }
+
+  Irssi::signal_continue($trmsg, $server, $witem);
+
+  if (Irssi::settings_get_bool("gtrans_show_orig")) {
+    my $origmsg = sprintf "[orig:%%B%s%%n] %s",
+        $result->language,
+        $msg;
+    $witem->print($origmsg, MSGLEVEL_CLIENTCRAP);
+  }
+
+  dbg(1, "Outbound auto-translation successful");
+}
+
+# FIXME: While topic translation is implemented, it needs more work to
+# be useful. Until it is, the code is not active.
+#sub event_topic {
+#  # signal "message own_public" parameters:
+#  # my ($server, $channel, $topic, $nick, $target) = @_;
+#
+#  return unless Irssi::settings_get_bool("gtrans_topic_auto");
+#
+#  dbg(5, "event_topic() args: " . Dumper(\@_));
+#
+#  my ($server, $channel, $msg, $nick, $target) = @_;
+#
+#  my $do_translation = 0;
+#
+#  # Check whether $channel is in the whitelist.
+#  dbg(3, "event_topic() Looking for channel \"$channel\" in " .
+#         "whitelist");
+#  foreach (split(/ /,
+#      Irssi::settings_get_str("gtrans_whitelist"))) {
+#    $do_translation = 1 if ($channel eq $_);
+#    $do_translation = 1 if ($_ eq "*");
+#  }
+#
+#  unless ($do_translation) {
+#    dbg(1, sprintf "Channel $channel is not whitelisted. " .
+#                   "Not translating topic");
+#    return;
+#  }
+#
+#  dbg(2, sprintf "event_topic() Channel $channel is whitelisted");
+#
+#  # Prepare arguments for language detection.
+#  utf8::decode($msg);
+#  my %args = (
+#    "func" => sub { $service->detect(@_) },
+#    "text" => $msg,
+#  );
+#
+#  # Run language detection.
+#  my $result = wgl_process(%args);
+#
+#  dbg(4, "event_topic() wgl_process() detect returned: " .
+#         Dumper(\$result));
+#
+#  if ($result->error) {
+#    dbg(1, "event_topic(): Language detection failed");
+#    err(sprintf "Language detection failed with code %s: %s",
+#        $result->code, $result->message);
+#    return;
+#  }
+#
+#  # Don't translate my languages.
+#  foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
+#    $do_translation = 0 if($result->language eq $_);
+#  }
+#
+#  unless ($do_translation) {
+#    dbg(2, "event_topic() Incoming language " .
+#           "\"$result->language\" matches my lang(s). " .
+#           "Not translating.");
+#    return;
+#  }
+#
+#  dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
+#                 $result->language, $result->confidence);
+#
+#  my $confidence = $result->confidence;
+#
+#  # Prepare arguments for translation.
+#  my %args = (
+#    "func" => sub { $service->translate(@_) },
+#    "text" => $msg,
+#    "dest" => (split(/ /,
+#        Irssi::settings_get_str("gtrans_my_lang")))[0]
+#  );
+#
+#  # Run translation.
+#  my $result = wgl_process(%args);
+#
+#  dbg(4, "event_topic() wgl_process() translate returned: " .
+#         Dumper(\$result));
+#
+#  if ($result->error) {
+#    dbg(1, "Topic translation failed");
+#    err(sprintf "Topic translation failed with code %s: %s",
+#        $result->code, $result->message);
+#    return;
+#  }
+#
+#  # FIXME: Don't alter messages!
+#  $msg = sprintf "[%s:%.2f] %s",
+#      $result->language, $confidence, $result->translation;
+#
+#  utf8::decode($msg);
+#  dehtml($msg);
+#
+#  # FIXME: More info about result?
+#  dbg(1, "Incoming topic translation successful");
+#
+#  Irssi::signal_continue($server, $channel, $msg, $nick, $target);
+#}
+
+sub cmd_gtrans {
+  my $subname = "cmd_gtrans";
+  my ($msg, $server, $witem) = @_;
+
+  dbg(5, "$subname() input: " . Dumper(\@_));
+
+  if ($msg =~ /^(|help|-h|--help|-t|--test)$/) {
+    usage();
+    return;
+  }
+
+  my $testing_mode = 0;
+  if ($msg =~ /^(-t|--test) /) {
+    $testing_mode = 1;
+    $msg =~ s/^(-t|--test) //;
+  }
+
+  return unless ($testing_mode or
<<Diff was trimmed, longer than 597 lines>>


More information about the pld-cvs-commit mailing list