html/gtrans.pl
1 #!/usr/bin/env perl -w
2 # vim: set sw=2 ts=2 sta et:
3
4 # GTrans: Automatic translation in Irssi using the Google Language API
5 # by Sven Ulland <svensven@gmail.com>. License: GPLv2
6 #
7 # DOCUMENTATION
8 # -------------
9 # Introduction:
10 # This script brings the power of the Google Language API to Irssi.
11 # In short, it provides a quick way to translate incoming and
12 # outgoing IRC messages with minimal effort. While the result is
13 # far from professional quality, it is vastly superior to most other
14 # automatic translation engines.
15 #
16 # Prerequisites:
17 # Better results are achieved if you write properly.
18 #
19 # Only UTF-8 text is supported. Make sure your terminal handles it.
20 #
21 # The WebService::Google::Language Perl module is required for the
22 # script to work. It is unlikely that your system provides binary
23 # packages for this module, so you probably have to install it
24 # manually or through the CPAN shell:
25 #
26 # $ perl -MCPAN -e "install WebService::Google::Language"
27 #
28 # Quick testing:
29 # To quickly test the script to see what it can do, you can run the
30 # following command after starting Irssi and loading the script. It
31 # will translate the text and display the result in the current
32 # window. No text will be sent to IRC.
33 #
34 # /gtrans --test fi:this is a small test
35 #
36 # Another example to translate text and send it to the target
37 # (channel or query) in the currently active window:
38 #
39 # /gtrans fi:hello! this is a small test
40 #
41 # Normal operation:
42 # When loaded with default settings, the script does nothing. The
43 # reason for this is to maintain privacy: It is not a good idea to
44 # submit potentially sensitive information directly to Google.
45 #
46 # Automatic translation requires that the channel or nick that sends
47 # or receives the message, is in a whitelist. The following scenario
48 # will enable automatic translation for the channel #mychan and nick
49 # 'james':
50 #
51 # /set gtrans_my_lang en
52 # /set gtrans_input_auto ON
53 # /set gtrans_output_auto 2
54 # /set gtrans_output_auto_lang fi
55 # /set gtrans_whitelist #mychan james
56 #
57 # Incoming or outgoing messages on the #mychan channel and queries
58 # from/to james will now be automatically translated: Incoming
59 # messages will be translated from any language to English; outgoing
60 # messages will be translated from any language to Finnish.
61 #
62 # Settings:
63 # The available settings are described below. The default value is
64 # shown in parentheses.
65 #
66 # gtrans_input_auto (ON)
67 # ON: Translate incoming messages that match gtrans_whitelist.
68 # Translate to the language specified by gtrans_my_lang.
69 # OFF: Don't translate incoming messages.
70 #
71 # gtrans_show_orig (ON)
72 # ON: Show the original, untranslated message, and display the
73 # translation on the next line. Applies to both incoming and
74 # outgoing messages.
75 # OFF: Translate messages transparently, hide original text.
76 #
77 # gtrans_output_auto (1)
78 # 0: Don't translate outgoing messages.
79 # 1: Translate outgoing messages only when the text is prefixed
80 # by "<lang>:". Example: fi:this is a small test. This will
81 # override the whitelist.
82 # 2: Translate outgoing messages automatically to the language
83 # specified by gtrans_output_auto_lang. Target has to match
84 # the whitelist.
85 #
86 # gtrans_output_auto_lang ("fi")
87 # xx: Set automatic output language to "xx". This applies to
88 # automatically translated outgoing messages when
89 # gtrans_output_auto is set to 2.
90 #
91 # gtrans_my_lang ("en")
92 # xx: Space-separated list of languages that should not be
93 # translated. Incoming messages will be translated to the
94 # first language in this list. Note: The language will be
95 # detected by sending the message to the Google API.
96 #
97 # gtrans_debug (0)
98 # 0: No debugging.
99 # 1: Light debugging. Useful to see what's going on.
100 # 2: Normal debugging. Slightly more verbose.
101 # 3: Medium debugging. Useful for troubleshooting.
102 # 4: Verbose debugging. Significant output.
103 # 5: Very verbose debugging. Lots of output.
104 #
105 # gtrans_whitelist ("")
106 # xx: Space-separated list of channels and nicks that can be
107 # translated. This applies to both incoming and outgoing
108 # messages. Specify "*" to whitelist everything.
109 #
110 # Links / more info:
111 # List of supported languages in the Google Language API:
112 # <URL:http://code.google.com/apis/ajaxlanguage/documentation/reference.html#LangNameArray>
113 #
114 # WebService::Google::Language Perl module at CPAN:
115 # <URL:http://search.cpan.org/~hma/WebService-Google-Language-0.02/lib/WebService/Google/Language.pm>
116 #
117 # TODO list:
118 # * What determines the value of isreliable? The API doesn't say.
119 # * Translate incoming/outgoing notices.
120 # * Translate incoming/outgoing topics.
121 # + Keep un-/translated topic in topic bar with a toggle.
122 # * Make debugging levels and messages more consistent.
123 # * Make whitelist work with servers/connections too.
124 # * Interact better with logging.
125 # * Better code reuse. Lots of duplication now.
126 # * Verify compatibility with other scripts/themes/configurations.
127 #
128
129 use strict;
130
131 use vars qw($VERSION %IRSSI);
132 use Irssi;
133 $VERSION = "0.0.1";
134 %IRSSI = (
135 authors => "Sven Ulland",
136 contact => "svensven\@gmail.com",
137 name => "GTrans",
138 description => "Translation via the Google Language API",
139 license => "GPLv2",
140 url => "http://scripts.irssi.org/",
141 changed => $VERSION,
142 modules => "WebService::Google::Language",
143 commands => "gtrans"
144 );
145
146 use Data::Dumper qw(Dumper);
147 use WebService::Google::Language;
148
149 my $service = WebService::Google::Language->new(
150 "referer" => "http://scripts.irssi.org/",
151 "agent" => "$IRSSI{name} $VERSION for Irssi",
152 "timeout" => 5,
153 "src" => "",
154 "dest" => "",
155 );
156
157 # Urgh. $glob_cmdpass is set to 1 when using gtrans_cmd() and later
158 # checked in event_output_msg(). The reason is that event_output_msg()
159 # is called twice: first by cmd_gtrans(), then by the event "send
160 # text".
161 my $glob_cmdpass = 0;
162
163 sub dbg {
164 my ($level, $msg) = @_;
165 return unless ($level <= Irssi::settings_get_int("gtrans_debug"));
166
167 my %dbgcol = (
168 1 => "%G",
169 2 => "%Y",
170 3 => "%C",
171 4 => "%M",
172 5 => "%R",
173 );
174
175 print CLIENTCRAP "%W$IRSSI{name} " .
176 "%Bdebug%W($dbgcol{$level}$level%W)>%n $msg";
177 }
178
179 sub err {
180 my $msg = shift;
181 print CLIENTCRAP "%W$IRSSI{name} %Rerror%W>%n $msg";
182 }
183
184 sub inf {
185 my $msg = shift;
186 print CLIENTCRAP "%W$IRSSI{name} %Ginfo%W>%n $msg";
187 }
188
189 sub usage {
190 print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
191 "/$IRSSI{commands} [-t|--test] <lang>:<message>";
192 print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
193 "Example: %W/$IRSSI{commands} fr:this message " .
194 "will be translated to french and sent to the " .
195 "currently active window.%n";
196 print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
197 "Example: %W/$IRSSI{commands} -t fi:this " .
198 "message will be translated to finnish, but " .
199 "*won't* be sent out. use this to test " .
200 "translations.%n";
201 print CLIENTCRAP "%W$IRSSI{name} %Yusage%W>%n " .
202 "There are several settings to modify " .
203 "translation behaviour. Type %W/set gtrans%n to " .
204 "see the available settings. See the script " .
205 "source for documentation.";
206 }
207
208 sub dehtml {
209 # FIXME: The only HTML entity seen so far is '
210 $_[0] =~ s/'/'/g;
211 }
212
213 sub wgl_process {
214 my %args = @_;
215 dbg(5, "wgl_process(): input %args: " . Dumper(\%args));
216
217 my $result = $args{func}(%args);
218 dbg(4, "wgl_process() wgl_func() output: " . Dumper(\$result));
219
220 my $ok = 1;
221 if ($result->error) {
222 err(sprintf "wgl_process() wgl_func() code %s: %s",
223 $result->code,
224 $result->message);
225 $ok = 0;
226 }
227
228 return $result;
229 }
230
231 sub event_input_msg {
232 my $subname = "event_input_msg";
233 my ($server, $msg, $nick, $address, $target) = @_;
234
235 return unless Irssi::settings_get_bool("gtrans_input_auto");
236
237 my $sig = Irssi::signal_get_emitted();
238 my $witem;
239
240 dbg(5, "$subname() args: " . Dumper(\@_));
241
242 my $do_translation = 0;
243
244 if ($sig eq "message private") {
245 # Private message.
246 $witem = Irssi::window_item_find($nick);
247
248 # Check whether the source $nick is in the whitelist.
249 dbg(3, "$subname() Looking for nick \"$nick\" in whitelist");
250 foreach (split(/ /,
251 Irssi::settings_get_str("gtrans_whitelist"))) {
252 $do_translation = 1 if ($nick eq $_ or $_ eq "*");
253 }
254 } else { # $sig eq "message public"
255 # Public message.
256 $witem = Irssi::window_item_find($target);
257
258 # Check whether $target is in the whitelist.
259 dbg(3, "$subname() Looking for channel \"$target\" " .
260 "in whitelist");
261 foreach (split(/ /,
262 Irssi::settings_get_str("gtrans_whitelist"))) {
263 $do_translation = 1 if ($target eq $_ or $_ eq "*");
264 }
265 }
266
267 unless ($do_translation) {
268 dbg(1, sprintf "Channel (\"$target\") or nick (\"$nick\") is " .
269 "not whitelisted");
270 return;
271 }
272
273 dbg(2, sprintf "$subname() Channel (\"$target\") or nick " .
274 "(\"$nick\") is whitelisted");
275
276 # Prepare arguments for language detection.
277 utf8::decode($msg);
278 my %args = (
279 "func" => sub { $service->detect(@_) },
280 "text" => $msg,
281 );
282
283 # Run language detection.
284 my $result = wgl_process(%args);
285
286 dbg(4, "$subname() wgl_process() detect returned: " .
287 Dumper(\$result));
288
289 if ($result->error) {
290 dbg(1, "$subname(): Language detection failed");
291 err(sprintf "Language detection failed with code %s: %s",
292 $result->code, $result->message);
293 return;
294 }
295
296 # Don't translate my languages.
297 foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
298 $do_translation = 0 if($result->language eq $_);
299 }
300
301 unless ($do_translation) {
302 dbg(2, "$subname() Incoming language " .
303 "\"$result->language\" matches my lang(s). " .
304 "Not translating.");
305 return;
306 }
307
308 dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
309 $result->language, $result->confidence);
310
311 my $confidence = $result->confidence;
312 my $reliable = $result->is_reliable;
313
314 # Prepare arguments for translation.
315 my %args = (
316 "func" => sub { $service->translate(@_) },
317 "text" => $msg,
318 "dest" => (split(/ /,
319 Irssi::settings_get_str("gtrans_my_lang")))[0]
320 );
321
322 # Run translation.
323 my $result = wgl_process(%args);
324
325 dbg(4, "$subname() wgl_process() translate returned: " .
326 Dumper(\$result));
327
328 if ($result->error) {
329 dbg(1, "Translation failed");
330 err(sprintf "Translation failed with code %s: %s",
331 $result->code, $result->message);
332 return;
333 }
334
335 if (Irssi::settings_get_bool("gtrans_show_orig")) {
336 my $trmsg = sprintf "[%%B%s%%n:%s%.2f%%n] %s",
337 $result->language,
338 $reliable ? "%g" : "%r",
339 $confidence,
340 $result->translation;
341 utf8::decode($trmsg);
342 dehtml($trmsg);
343
344 Irssi::signal_continue($server, $msg, $nick, $address, $target);
345 $witem->print($trmsg, MSGLEVEL_CLIENTCRAP);
346 }
347 else {
348 $msg = sprintf "[%s:%.2f] %s",
349 $result->language,
350 $confidence,
351 $result->translation;
352 utf8::decode($msg);
353 dehtml($msg);
354
355 Irssi::signal_continue($server, $msg, $nick, $address, $target);
356 }
357
358 dbg(1, "Incoming translation successful");
359 }
360
361 sub event_output_msg {
362 my $subname = "event_output_msg";
363 my ($msg, $server, $witem, $force_lang) = @_;
364
365 dbg(5, "$subname() args: " . Dumper(\@_));
366
367 # Safeguard to stop double translations when using /gtrans.
368 if ($glob_cmdpass) {
369 $glob_cmdpass = 0;
370 Irssi::signal_continue($msg, $server, $witem);
371 return;
372 }
373
374 return unless (
375 (Irssi::settings_get_int("gtrans_output_auto") > 0 and
376 Irssi::settings_get_int("gtrans_output_auto") <= 2)
377 or $force_lang);
378
379 # Determine destination language before doing translation.
380 my $dest_lang;
381 if($force_lang) {
382 $dest_lang = $force_lang;
383 }
384 elsif (Irssi::settings_get_int("gtrans_output_auto") eq 1) {
385 # Semiauto translation. Here we preprocess the msg to determine
386 # destination language. The WGL API cannot fetch the list of valid
387 # languages, so we simply try to see if the language is valid.
388 if ( $msg =~ /^([a-z]{2}(-[a-z]{2})?):(.*)/i) {
389 dbg(2, "$subname() dest_lang \"$1\", msg \"$3\"");
390 $dest_lang = $1;
391 $msg = $3;
392 }
393 }
394 elsif (Irssi::settings_get_int("gtrans_output_auto") eq 2) {
395 # Fully automated translation.
396 # To avoid accidents, verify that $witem->{name} is whitelisted.
397 dbg(3, "$subname() Looking for target \"" .
398 $witem->{name} . "\" in whitelist");
399
400 my $do_translation = 0;
401 foreach (split(/ /,
402 Irssi::settings_get_str("gtrans_whitelist"))) {
403 $do_translation = 1 if ($witem->{name} eq $_);
404 $do_translation = 1 if ($_ eq "*");
405 }
406
407 unless ($do_translation) {
408 dbg(1, sprintf "Target \"" . $witem->{name} . "\" is " .
409 "not whitelisted");
410 return;
411 }
412
413 dbg(2, sprintf "$subname() Target \"" . $witem->{name} .
414 "\" is whitelisted");
415 $dest_lang = Irssi::settings_get_str("gtrans_output_auto_lang");
416 }
417
418 unless ($dest_lang and $msg) {
419 dbg(1, "Empty destination language or message");
420 return;
421 }
422
423 # Prepare arguments for translation.
424 utf8::decode($msg);
425 my %args = (
426 "func" => sub { $service->translate(@_) },
427 "text" => $msg,
428 "dest" => $dest_lang
429 );
430
431 # Run translation.
432 my $result = wgl_process(%args);
433
434 dbg(4, "$subname() wgl_process() output: " .
435 Dumper(\$result));
436
437 if ($result->error) {
438 dbg(1, "$subname() Translation failed");
439 err(sprintf "Translation failed with code %s: %s",
440 $result->code, $result->message);
441 return;
442 }
443
444 my $trmsg;
445 if ($result->language ne $dest_lang) {
446 $trmsg = $result->translation;
447 utf8::decode($trmsg);
448 dehtml($trmsg);
449 }
450
451 if($force_lang) {
452 # Emit new signal, since we came from cmd_gtrans().
453 $glob_cmdpass = 1; # Don't translate in event_output_msg()
454 dbg(3, "$subname():" . __LINE__ .
455 " Emitting \"send text\" signal");
456 Irssi::signal_emit("send text", $trmsg, $server, $witem);
457 return;
458 }
459
460 Irssi::signal_continue($trmsg, $server, $witem);
461
462 if (Irssi::settings_get_bool("gtrans_show_orig")) {
463 my $origmsg = sprintf "[orig:%%B%s%%n] %s",
464 $result->language,
465 $msg;
466 $witem->print($origmsg, MSGLEVEL_CLIENTCRAP);
467 }
468
469 dbg(1, "Outbound auto-translation successful");
470 }
471
472 # FIXME: While topic translation is implemented, it needs more work to
473 # be useful. Until it is, the code is not active.
474 #sub event_topic {
475 # # signal "message own_public" parameters:
476 # # my ($server, $channel, $topic, $nick, $target) = @_;
477 #
478 # return unless Irssi::settings_get_bool("gtrans_topic_auto");
479 #
480 # dbg(5, "event_topic() args: " . Dumper(\@_));
481 #
482 # my ($server, $channel, $msg, $nick, $target) = @_;
483 #
484 # my $do_translation = 0;
485 #
486 # # Check whether $channel is in the whitelist.
487 # dbg(3, "event_topic() Looking for channel \"$channel\" in " .
488 # "whitelist");
489 # foreach (split(/ /,
490 # Irssi::settings_get_str("gtrans_whitelist"))) {
491 # $do_translation = 1 if ($channel eq $_);
492 # $do_translation = 1 if ($_ eq "*");
493 # }
494 #
495 # unless ($do_translation) {
496 # dbg(1, sprintf "Channel $channel is not whitelisted. " .
497 # "Not translating topic");
498 # return;
499 # }
500 #
501 # dbg(2, sprintf "event_topic() Channel $channel is whitelisted");
502 #
503 # # Prepare arguments for language detection.
504 # utf8::decode($msg);
505 # my %args = (
506 # "func" => sub { $service->detect(@_) },
507 # "text" => $msg,
508 # );
509 #
510 # # Run language detection.
511 # my $result = wgl_process(%args);
512 #
513 # dbg(4, "event_topic() wgl_process() detect returned: " .
514 # Dumper(\$result));
515 #
516 # if ($result->error) {
517 # dbg(1, "event_topic(): Language detection failed");
518 # err(sprintf "Language detection failed with code %s: %s",
519 # $result->code, $result->message);
520 # return;
521 # }
522 #
523 # # Don't translate my languages.
524 # foreach (split(/ /, Irssi::settings_get_str("gtrans_my_lang"))) {
525 # $do_translation = 0 if($result->language eq $_);
526 # }
527 #
528 # unless ($do_translation) {
529 # dbg(2, "event_topic() Incoming language " .
530 # "\"$result->language\" matches my lang(s). " .
531 # "Not translating.");
532 # return;
533 # }
534 #
535 # dbg(1, sprintf "Detected language \"%s\", confidence %.3f",
536 # $result->language, $result->confidence);
537 #
538 # my $confidence = $result->confidence;
539 #
540 # # Prepare arguments for translation.
541 # my %args = (
542 # "func" => sub { $service->translate(@_) },
543 # "text" => $msg,
544 # "dest" => (split(/ /,
545 # Irssi::settings_get_str("gtrans_my_lang")))[0]
546 # );
547 #
548 # # Run translation.
549 # my $result = wgl_process(%args);
550 #
551 # dbg(4, "event_topic() wgl_process() translate returned: " .
552 # Dumper(\$result));
553 #
554 # if ($result->error) {
555 # dbg(1, "Topic translation failed");
556 # err(sprintf "Topic translation failed with code %s: %s",
557 # $result->code, $result->message);
558 # return;
559 # }
560 #
561 # # FIXME: Don't alter messages!
562 # $msg = sprintf "[%s:%.2f] %s",
563 # $result->language, $confidence, $result->translation;
564 #
565 # utf8::decode($msg);
566 # dehtml($msg);
567 #
568 # # FIXME: More info about result?
569 # dbg(1, "Incoming topic translation successful");
570 #
571 # Irssi::signal_continue($server, $channel, $msg, $nick, $target);
572 #}
573
574 sub cmd_gtrans {
575 my $subname = "cmd_gtrans";
576 my ($msg, $server, $witem) = @_;
577
578 dbg(5, "$subname() input: " . Dumper(\@_));
579
580 if ($msg =~ /^(|help|-h|--help|-t|--test)$/) {
581 usage();
582 return;
583 }
584
585 my $testing_mode = 0;
586 if ($msg =~ /^(-t|--test) /) {
587 $testing_mode = 1;
588 $msg =~ s/^(-t|--test) //;
589 }
590
591 return unless ($testing_mode or
592 ($witem and
593 ($witem->{type} eq "CHANNEL" or
594 $witem->{type} eq "QUERY")));
595
596 # Determine destination language before doing translation.
597 my $dest_lang;
598
599 # FIXME: What about languages on the form "xx-yy"?
600 if ( $msg =~ /^([a-z]{2}):(.*)/i) {
601 dbg(2, "$subname() dest_lang \"$1\", msg \"$2\"");
602 $dest_lang = $1;
603 $msg = $2;
604 } else {
605 dbg(2, "$subname() syntax error");
606 }
607
608 unless ($dest_lang and $msg) {
609 err("Empty destination language or message");
610 usage();
611 return;
612 }
613
614 if ($testing_mode) {
615 # Prepare arguments for translation.
616 utf8::decode($msg);
617 my %args = (
618 "func" => sub { $service->translate(@_) },
619 "text" => $msg,
620 "dest" => $dest_lang
621 );
622
623 # Run translation.
624 my $result = wgl_process(%args);
625
626 dbg(4, "$subname() wgl_process() output: " . Dumper(\$result));
627
628 if ($result->error) {
629 dbg(1, "$subname(): Translation failed");
630 err(sprintf "Translation failed with code %s: %s",
631 $result->code, $result->message);
632 return;
633 }
634
635 $msg = $result->translation;
636 utf8::decode($msg);
637 dehtml($msg);
638
639 dbg(1, "Outbound translation successful");
640
641 $witem = Irssi::active_win();
642 $witem->print(sprintf
643 ("%%GGTrans test (%%B%s%%n->%%B%s%%G):%%n %s",
644 $result->language,
645 $dest_lang,
646 $msg), MSGLEVEL_CLIENTCRAP);
647 }
648 else {
649 event_output_msg($msg, $server, $witem, $dest_lang);
650 }
651 }
652
653 print CLIENTCRAP "%W$IRSSI{name} loaded. " .
654 "Hints: %n/$IRSSI{commands} help";
655
656 # Register gtrans settings.
657 Irssi::settings_add_bool("gtrans", "gtrans_input_auto", 1);
658 #Irssi::settings_add_bool("gtrans", "gtrans_topic_auto", 0);
659 Irssi::settings_add_bool("gtrans", "gtrans_show_orig", 1);
660 Irssi::settings_add_int ("gtrans", "gtrans_output_auto", 1);
661 Irssi::settings_add_str ("gtrans", "gtrans_output_auto_lang", "fi");
662 Irssi::settings_add_str ("gtrans", "gtrans_my_lang", "en");
663 Irssi::settings_add_int ("gtrans", "gtrans_debug", 0);
664 Irssi::settings_add_str ("gtrans", "gtrans_whitelist", "");
665
666 # Register /gtrans command.
667 Irssi::command_bind("gtrans", "cmd_gtrans");
668
669 # Register events for incoming messages/actions.
670 Irssi::signal_add_last("message public", "event_input_msg");
671 Irssi::signal_add_last("message private", "event_input_msg");
672
673 # Register events for outgoing messages/actions.
674 Irssi::signal_add("send text", "event_output_msg");
675
676 #TODO: Register events that need special handling.
677 #Irssi::signal_add("message irc action", "event_input_msg");
678 #Irssi::signal_add("message irc notice", "event_input_msg");
679 #Irssi::signal_add("message irc own_action", "event_output_msg");
680 #Irssi::signal_add("message irc own_notice", "event_output_msg");
681 #Irssi::signal_add("event topic", "event_topic");