html/romajibind.pl


   1 #!/usr/bin/perl -w
   2 #
   3 # Copyright (c) 2002 Victor Ivanov <v0rbiz@yahoo.com>
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions
   8 # are met:
   9 # 1. Redistributions of source code must retain the above copyright
  10 #    notice, this list of conditions and the following disclaimer.
  11 # 2. Redistributions in binary form must reproduce the above copyright
  12 #    notice, this list of conditions and the following disclaimer in the
  13 #    documentation and/or other materials provided with the distribution.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25 # SUCH DAMAGE.
  26 #
  27 
  28 use strict;
  29 use vars qw($VERSION %IRSSI);
  30 $VERSION = "1.0b";
  31 %IRSSI = (
  32     authors	=> 'Victor Ivanov',
  33     contact	=> 'v0rbiz@yahoo.com',
  34     name	=> 'romajibind',
  35     description	=> 'Dynamic romaji binds',
  36     license	=> 'BSD 2-clause',
  37     url		=> 'http://irssi.org/scripts/'
  38 );
  39 
  40 # Some help...
  41 # First, this is UTF-8 script.
  42 # Press ctrl-R to switch between Hiragana, Katakana and English input
  43 #
  44 # When the script is loading, it will install the huge amount of
  45 # second-level binds. This takes some time.
  46 #
  47 # When you press ctrl-R it will install some binds, but not the whole
  48 # bunch. Still, it takes noticeable amount of time. If you want
  49 # something faster, try the simple romaji.pl :)
  50 #
  51 # The system is mostly Hepburn, but it could have some kunrei mappings also.
  52 #
  53 # Because of the irssi bind limits, the small tsu is not automatic as in
  54 # the romaji.pl. You need to type it explicitly, using 'tt'.
  55 # Same goes for ん and ン, which are typed with nn or mm.
  56 #
  57 # There is a statusbar item which shows a glyph for the current mapping.
  58 # [英]語 -> [平]仮名 -> [片]仮名
  59 #
  60 # If you want it, type
  61 # /statusbar window add ro1_sb
  62 # (just once, it will remember it)
  63 
  64 use Irssi;
  65 use Irssi::TextUI;
  66 
  67 # Meow
  68 # These are almost the same as in romaji.pl
  69 
  70 my(%hira) = (
  71     "a"   => "あ", "i"   => "い", "u"   => "う", "e"   => "え", "o"   => "お",
  72     "ka"  => "か", "ki"  => "き", "ku"  => "く", "ke"  => "け", "ko"  => "こ",
  73     "sa"  => "さ", "shi" => "し", "su"  => "す", "se"  => "せ", "so"  => "そ",
  74     "ta"  => "た", "chi" => "ち", "tsu" => "つ", "te"  => "て", "to"  => "と",
  75     "na"  => "な", "ni"  => "に", "nu"  => "ぬ", "ne"  => "ね", "no"  => "の",
  76     "ha"  => "は", "hi"  => "ひ", "hu"  => "ふ", "he"  => "へ", "ho"  => "ほ", "fu"  => "ふ",
  77     "ma"  => "ま", "mi"  => "み", "mu"  => "む", "me"  => "め", "mo"  => "も",
  78     "ya"  => "や", "yu"  => "ゆ", "yo"  => "よ",
  79     "ra"  => "ら", "ri"  => "り", "ru"  => "る", "re"  => "れ", "ro"  => "ろ",
  80     "wa"  => "わ", "wi"  => "ゐ", "we"  => "ゑ", "wo"  => "を",
  81     "nn"  => "ã‚“",
  82     "mm"  => "ã‚“",
  83 
  84     "ga"  => "が", "gi"  => "ぎ", "gu"  => "ぐ", "ge"  => "げ", "go"  => "ご",
  85     "za"  => "ざ", "ji"  => "じ", "zu"  => "ず", "ze"  => "ぜ", "zo"  => "ぞ",
  86     "da"  => "だ", "dzi" => "ぢ", "dzu" => "づ", "de"  => "で", "do"  => "ど",
  87     "ba"  => "ば", "bi"  => "び", "bu"  => "ぶ", "be"  => "べ", "bo"  => "ぼ",
  88     "pa"  => "ぱ", "pi"  => "ぴ", "pu"  => "ぷ", "pe"  => "ぺ", "po"  => "ぽ",
  89 
  90     "fa"  => "ふぁ", "fi"  => "ふぃ", "fe"  => "ふぇ", "fo"  => "ふぉ",
  91     "di"  => "でぃ",
  92 
  93     "kya" => "きゃ", "kyu" => "きゅ", "kyo" => "きょ",
  94     "sha" => "しゃ", "shu" => "しゅ", "sho" => "しょ",
  95     "cha" => "ちゃ", "chu" => "ちゅ", "cho" => "ちょ",
  96     "nya" => "にゃ", "nyu" => "にゅ", "nyo" => "にょ",
  97     "hya" => "ひゃ", "hyu" => "ひゅ", "hyo" => "ひょ",
  98     "mya" => "みゃ", "myu" => "みゅ", "myo" => "みょ",
  99     "rya" => "りゃ", "ryu" => "りゅ", "ryo" => "りょ",
 100     "gya" => "ぎゃ", "gyu" => "ぎゅ", "gyo" => "ぎょ",
 101     "ja"  => "じゃ", "ju"  => "じゅ", "jo"  => "じょ",
 102     "jya" => "じゃ", "jyu" => "じゅ", "jyo" => "じょ",
 103     "dza" => "ぢゃ", "dju" => "ぢゅ", "dzo" => "ぢょ",
 104     "dja" => "ぢゃ",                  "djo" => "ぢょ",
 105     "bya" => "びゃ", "byu" => "びゅ", "byo" => "びょ",
 106     "pya" => "ぴゃ", "pyu" => "ぴゅ", "pyo" => "ぴょ",
 107 
 108     "tt"  => "っ"
 109 );
 110 
 111 my(%kata) = (
 112     "a"   => "ア", "i"   => "イ", "u"   => "ウ", "e"   => "エ", "o"   => "オ",
 113     "ka"  => "カ", "ki"  => "キ", "ku"  => "ク", "ke"  => "ケ", "ko"  => "コ",
 114     "sa"  => "サ", "shi" => "シ", "su"  => "ス", "se"  => "セ", "so"  => "ソ",
 115     "ta"  => "タ", "chi" => "チ", "tsu" => "ツ", "te"  => "テ", "to"  => "ト",
 116     "na"  => "ナ", "ni"  => "ニ", "nu"  => "ヌ", "ne"  => "ネ", "no"  => "ノ",
 117     "ha"  => "ハ", "hi"  => "ヒ", "hu"  => "フ", "he"  => "ヘ", "ho"  => "ホ", "fu"  => "フ",
 118     "ma"  => "マ", "mi"  => "ミ", "mu"  => "ム", "me"  => "メ", "mo"  => "モ",
 119     "ya"  => "ヤ", "yu"  => "ユ", "yo"  => "ヨ", "ye"  => "エ",
 120     "ra"  => "ラ", "ri"  => "リ", "ru"  => "ル", "re"  => "レ", "ro"  => "ロ",
 121     "wa"  => "ワ", "wi"  => "ヰ", "we"  => "ヱ", "wo"  => "ヲ",
 122     "nn"  => "ン",
 123     "mm"  => "ン",
 124 
 125     "ga"  => "ガ", "gi"  => "ギ", "gu"  => "グ", "ge"  => "ゲ", "go"  => "ゴ",
 126     "za"  => "ザ", "ji"  => "ジ", "zu"  => "ズ", "ze"  => "ゼ", "zo"  => "ゾ",
 127     "da"  => "ダ", "dzi" => "ヂ", "dzu" => "ヅ", "de"  => "デ", "do"  => "ド",
 128     "ba"  => "バ", "bi"  => "ビ", "bu"  => "ブ", "be"  => "ベ", "bo"  => "ボ",
 129     "pa"  => "パ", "pi"  => "ピ", "pu"  => "プ", "pe"  => "ペ", "po"  => "ポ",
 130 
 131     "va"  => "ヴァ", "vi"  => "ヴィ", "vu"  => "ヴ",   "ve"  => "ヴェ", "vo"  => "ヴォ",
 132     "fa"  => "ファ", "fi"  => "フィ", "fe"  => "フェ", "fo"  => "フォ",
 133     "di"  => "ディ",
 134 
 135     "dje" => "ヂェ", "dze" => "ヂェ",
 136 
 137     "kya" => "キャ", "kyu" => "キュ", "kyo" => "キョ",
 138     "sha" => "シャ", "shu" => "シュ", "sho" => "ショ",
 139     "cha" => "チャ", "chu" => "チュ", "cho" => "チョ",
 140     "nya" => "ニャ", "nyu" => "ニュ", "nyo" => "ニョ",
 141     "hya" => "ヒャ", "hyu" => "ヒュ", "hyo" => "ヒョ",
 142     "mya" => "ミャ", "myu" => "ミュ", "myo" => "ミョ",
 143     "rya" => "リャ", "ryu" => "リュ", "ryo" => "リョ",
 144     "gya" => "ギャ", "gyu" => "ギュ", "gyo" => "ギョ",
 145     "ja"  => "ジャ", "ju"  => "ジュ", "jo"  => "ジョ",
 146     "jya" => "ジャ", "jyu" => "ジュ", "jyo" => "ジョ",
 147     "dza" => "ヂャ", "dju" => "ヂュ", "dzo" => "ヂョ",
 148     "dja" => "ヂャ",                  "djo" => "ヂョ",
 149     "bya" => "ビャ", "byu" => "ビュ", "byo" => "ビョ",
 150     "pya" => "ピャ", "pyu" => "ピュ", "pyo" => "ピョ",
 151 
 152     "tt"  => "ッ"
 153 );
 154 
 155 my(%comm) = (
 156     "-"   => "ー",
 157     "."   => "。",
 158     ","   => "、",
 159     "!"   => "!",
 160     "?"   => "?",
 161     "~"   => "〜",
 162     "["   => "〔", "]"   => "〕",
 163     "{"   => "【", "}"   => "】",
 164     "("   => "(", ")"   => ")",
 165     "0"   => "0", "1"   => "1", "2"   => "2", "3"   => "3", "4"   => "4",
 166     "5"   => "5", "6"   => "6", "7"   => "7", "8"   => "8", "9"   => "9",
 167     "*"   => "★", # ☆ is uglier :P
 168     # where to put ♪ ?
 169 );
 170 
 171 my(@squot) = ( "「", "」" );
 172 my($squoti) = 0;
 173 my(@dquot) = ( "『", "』" );
 174 my($dquoti) = 0;
 175 
 176 my(%hirab); # Contains DIRECT insert_texts and first-level metas for Hiragana
 177 my(%katab); # Contains DIRECT insert_texts and first-level metas for Katakana
 178 my(%commb); # Common binds
 179 my(%persb); # Persistent binds (don't collide and are all second-level or more)
 180 
 181 my($currs) = "英"; # Current state eigo -> hiragana -> katakana
 182 
 183 # Builds irssi binds from a hash containing romaji -> utf-8 pairs
 184 # Arguments: sh, dh, pr
 185 #   sh:  Source Hash (%hira, %kata, %comm)
 186 #   dh:  Destination Hash (%hirab or %katab)
 187 #   pr:  Prefix for meta keys (hira or kata)
 188 # The function uses %persb for all non-direct binds
 189 sub build_binds ($$$) {
 190     my($sh) = $_[0]; # Source hash, %hira or %kata
 191     my($dh) = $_[1]; # Destination hash, %hirab or %katab
 192     my($pr) = $_[2]; # The prefix
 193     my($k, $v);      # for each from the source hash
 194 
 195     while (($k, $v) = each %{$sh}) {
 196 	my($ll) = length($k); # get the length of the KEY
 197 	my($tk, $tv);         # used to take apart the KEY into chars
 198 
 199 	if ($ll == 1) { # one-char KEYs are easy
 200 	    ${$dh}{$k} = "insert_text $v";
 201 	} elsif ($ll >= 2) {
 202 	    # take the first and the second chars
 203 	    $tk = substr($k, 0, 1);
 204 	    $tv = substr($k, 1, 1);
 205 	    # if the meta-key is not defined yet, define it now
 206 	    if (!${$dh}{$tk}) {
 207 		${$dh}{$tk} = "key $pr$tk";
 208 	    }
 209 	    # if the KEY is 2-char, define it now
 210 	    if ($ll == 2) {
 211 		$persb{"$pr$tk-$tv"} = "insert_text $v";
 212 	    } else {
 213 		# otherwise register a new meta key, if not yet registered
 214 		if (!$persb{"$pr$tk-$tv"}) {
 215 		    $persb{"$pr$tk-$tv"} = "key $pr$tk$tv";
 216 		}
 217 		# and now register the key...
 218 		$tk .= $tv;
 219 		$tv = substr($k, 2, 1);
 220 		$persb{"$pr$tk-$tv"} = "insert_text $v";
 221 	    }
 222 	}
 223     }
 224 }
 225 
 226 # Applies all binds in a given hash
 227 sub do_binds ($) {
 228     my($h) = $_[0];
 229     my($k, $v);
 230 
 231     while (($k, $v) = each %{$h}) {
 232 	Irssi::command("^bind $k $v");
 233     }
 234 }
 235 
 236 # Deletes all binds existing in the given hash
 237 sub del_binds ($) {
 238     my($h) = $_[0];
 239     my($k, $v);
 240 
 241     while (($k, $v) = each %{$h}) {
 242 	Irssi::command("^bind -delete $k");
 243     }
 244 }
 245 
 246 # Bindings for hiragana, next Ctrl-R will bind Katakana
 247 sub cmd_rohira {
 248     Irssi::command("^bind ^R /rokata");
 249     do_binds \%hirab;
 250     do_binds \%commb;
 251     $currs = "å¹³";
 252     Irssi::statusbar_items_redraw('ro1_sb');
 253 }
 254 
 255 # Bindings for Katakana, next Ctrl-R will restore
 256 sub cmd_rokata {
 257     Irssi::command("^bind ^R /rorest");
 258     del_binds \%hirab;
 259     do_binds \%katab;
 260     # no need to rebind commons from %commb
 261     $currs = "片";
 262     Irssi::statusbar_items_redraw('ro1_sb');
 263 }
 264 
 265 # Delete bindings (first-level), next Ctrl-R will bind Hiragana
 266 sub cmd_rorest {
 267     Irssi::command("^bind ^R /rohira");
 268     del_binds \%katab;
 269     del_binds \%commb;
 270     $currs = "英";
 271     Irssi::statusbar_items_redraw('ro1_sb');
 272 }
 273 
 274 # Display the statusbar item
 275 sub ro1_sb_show ($$) {
 276     my ($item, $get_size_only) = @_;
 277 
 278     $item->{min_size} = $item->{max_size} = 2;
 279     $item->default_handler($get_size_only, "{sb " . $currs . "}", 0, 1);
 280 }
 281 
 282 # Register the /commands
 283 Irssi::command_bind('rohira', 'cmd_rohira');
 284 Irssi::command_bind('rokata', 'cmd_rokata');
 285 Irssi::command_bind('rorest', 'cmd_rorest');
 286 
 287 # Register the statusbar item
 288 Irssi::statusbar_item_register('ro1_sb', 0, "ro1_sb_show");
 289 Irssi::statusbar_items_redraw('ro1_sb');
 290 
 291 # Bind Ctrl-R to Hiragana (initial position)
 292 Irssi::command("^bind ^R /rohira");
 293 
 294 # Build the bind hashes
 295 build_binds \%hira, \%hirab, "hira";
 296 build_binds \%kata, \%katab, "kata";
 297 build_binds \%comm, \%commb, "comm";
 298 
 299 # Register persistent binds... SLOWwwwwww :(((
 300 do_binds \%persb;