html/romaji.pl


   1 #!/usr/bin/perl -w
   2 #
   3 # Copyright (c) 2002 Victor Ivanov <v0rbiz@yahoo.com>
   4 # All rights reserved.
   5 #
   6 # Redistribution and use in source and binary forms, with or without
   7 # modification, are permitted provided that the following conditions
   8 # are met:
   9 # 1. Redistributions of source code must retain the above copyright
  10 #    notice, this list of conditions and the following disclaimer.
  11 # 2. Redistributions in binary form must reproduce the above copyright
  12 #    notice, this list of conditions and the following disclaimer in the
  13 #    documentation and/or other materials provided with the distribution.
  14 #
  15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18 # ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25 # SUCH DAMAGE.
  26 #
  27 
  28 use strict;
  29 use vars qw($VERSION %IRSSI);
  30 
  31 $VERSION = '1.0b3';
  32 %IRSSI = (
  33     authors	=> 'Victor Ivanov',
  34     contact	=> 'v0rbiz@yahoo.com',
  35     name	=> 'romaji',
  36     description => 'translates romaji to hiragana or katakana in text enclosed in ^R',
  37     license	=> 'BSD 2-clause',
  38     url		=> 'http://irssi.org/scripts/'
  39 );
  40 
  41 
  42 my(%hira) = (
  43     "a"   => "あ", "i"   => "い", "u"   => "う", "e"   => "え", "o"   => "お",
  44     "ka"  => "か", "ki"  => "き", "ku"  => "く", "ke"  => "け", "ko"  => "こ",
  45     "sa"  => "さ", "shi" => "し", "su"  => "す", "se"  => "せ", "so"  => "そ",
  46     "ta"  => "た", "chi" => "ち", "tsu" => "つ", "te"  => "て", "to"  => "と",
  47     "na"  => "な", "ni"  => "に", "nu"  => "ぬ", "ne"  => "ね", "no"  => "の",
  48     "ha"  => "は", "hi"  => "ひ", "hu"  => "ふ", "he"  => "へ", "ho"  => "ほ", "fu"  => "ふ",
  49     "ma"  => "ま", "mi"  => "み", "mu"  => "む", "me"  => "め", "mo"  => "も",
  50     "ya"  => "や", "yu"  => "ゆ", "yo"  => "よ",
  51     "ra"  => "ら", "ri"  => "り", "ru"  => "る", "re"  => "れ", "ro"  => "ろ",
  52     "wa"  => "わ", "wi"  => "ゐ", "we"  => "ゑ", "wo"  => "を",
  53     "n"   => "ã‚“",
  54     "m"   => "ã‚“",
  55 
  56     "ga"  => "が", "gi"  => "ぎ", "gu"  => "ぐ", "ge"  => "げ", "go"  => "ご",
  57     "za"  => "ざ", "ji"  => "じ", "zu"  => "ず", "ze"  => "ぜ", "zo"  => "ぞ",
  58     "da"  => "だ", "dzi" => "ぢ", "dzu" => "づ", "de"  => "で", "do"  => "ど",
  59     "ba"  => "ば", "bi"  => "び", "bu"  => "ぶ", "be"  => "べ", "bo"  => "ぼ",
  60     "pa"  => "ぱ", "pi"  => "ぴ", "pu"  => "ぷ", "pe"  => "ぺ", "po"  => "ぽ",
  61 
  62     "fa"  => "ふぁ", "fi"  => "ふぃ", "fe"  => "ふぇ", "fo"  => "ふぉ",
  63     "di"  => "でぃ",
  64 
  65     "kya" => "きゃ", "kyu" => "きゅ", "kyo" => "きょ",
  66     "sha" => "しゃ", "shu" => "しゅ", "sho" => "しょ",
  67     "cha" => "ちゃ", "chu" => "ちゅ", "cho" => "ちょ",
  68     "nya" => "にゃ", "nyu" => "にゅ", "nyo" => "にょ",
  69     "hya" => "ひゃ", "hyu" => "ひゅ", "hyo" => "ひょ",
  70     "mya" => "みゃ", "myu" => "みゅ", "myo" => "みょ",
  71     "rya" => "りゃ", "ryu" => "りゅ", "ryo" => "りょ",
  72     "gya" => "ぎゃ", "gyu" => "ぎゅ", "gyo" => "ぎょ",
  73     "ja"  => "じゃ", "ju"  => "じゅ", "jo"  => "じょ",
  74     "jya" => "じゃ", "jyu" => "じゅ", "jyo" => "じょ",
  75     "dza" => "ぢゃ", "dju" => "ぢゅ", "dzo" => "ぢょ",
  76     "dja" => "ぢゃ",                  "djo" => "ぢょ",
  77     "bya" => "びゃ", "byu" => "びゅ", "byo" => "びょ",
  78     "pya" => "ぴゃ", "pyu" => "ぴゅ", "pyo" => "ぴょ",
  79 
  80     "TSU" => "っ"
  81 );
  82 
  83 my(%kata) = (
  84     "a"   => "ア", "i"   => "イ", "u"   => "ウ", "e"   => "エ", "o"   => "オ",
  85     "ka"  => "カ", "ki"  => "キ", "ku"  => "ク", "ke"  => "ケ", "ko"  => "コ",
  86     "sa"  => "サ", "shi" => "シ", "su"  => "ス", "se"  => "セ", "so"  => "ソ",
  87     "ta"  => "タ", "chi" => "チ", "tsu" => "ツ", "te"  => "テ", "to"  => "ト",
  88     "na"  => "ナ", "ni"  => "ニ", "nu"  => "ヌ", "ne"  => "ネ", "no"  => "ノ",
  89     "ha"  => "ハ", "hi"  => "ヒ", "hu"  => "フ", "he"  => "ヘ", "ho"  => "ホ", "fu"  => "フ",
  90     "ma"  => "マ", "mi"  => "ミ", "mu"  => "ム", "me"  => "メ", "mo"  => "モ",
  91     "ya"  => "ヤ", "yu"  => "ユ", "yo"  => "ヨ", "ye"  => "エ",
  92     "ra"  => "ラ", "ri"  => "リ", "ru"  => "ル", "re"  => "レ", "ro"  => "ロ",
  93     "wa"  => "ワ", "wi"  => "ヰ", "we"  => "ヱ", "wo"  => "ヲ",
  94     "n"   => "ン",
  95     "m"   => "ン",
  96 
  97     "ga"  => "ガ", "gi"  => "ギ", "gu"  => "グ", "ge"  => "ゲ", "go"  => "ゴ",
  98     "za"  => "ザ", "ji"  => "ジ", "zu"  => "ズ", "ze"  => "ゼ", "zo"  => "ゾ",
  99     "da"  => "ダ", "dzi" => "ヂ", "dzu" => "ヅ", "de"  => "デ", "do"  => "ド",
 100     "ba"  => "バ", "bi"  => "ビ", "bu"  => "ブ", "be"  => "ベ", "bo"  => "ボ",
 101     "pa"  => "パ", "pi"  => "ピ", "pu"  => "プ", "pe"  => "ペ", "po"  => "ポ",
 102 
 103     "va"  => "ヴァ", "vi"  => "ヴィ", "vu"  => "ヴ",   "ve"  => "ヴェ", "vo"  => "ヴォ",
 104     "fa"  => "ファ", "fi"  => "フィ", "fe"  => "フェ", "fo"  => "フォ",
 105     "di"  => "ディ",
 106 
 107     "dje" => "ヂェ", "dze" => "ヂェ",
 108 
 109     "kya" => "キャ", "kyu" => "キュ", "kyo" => "キョ",
 110     "sha" => "シャ", "shu" => "シュ", "sho" => "ショ",
 111     "cha" => "チャ", "chu" => "チュ", "cho" => "チョ",
 112     "nya" => "ニャ", "nyu" => "ニュ", "nyo" => "ニョ",
 113     "hya" => "ヒャ", "hyu" => "ヒュ", "hyo" => "ヒョ",
 114     "mya" => "ミャ", "myu" => "ミュ", "myo" => "ミョ",
 115     "rya" => "リャ", "ryu" => "リュ", "ryo" => "リョ",
 116     "gya" => "ギャ", "gyu" => "ギュ", "gyo" => "ギョ",
 117     "ja"  => "ジャ", "ju"  => "ジュ", "jo"  => "ジョ",
 118     "jya" => "ジャ", "jyu" => "ジュ", "jyo" => "ジョ",
 119     "dza" => "ヂャ", "dju" => "ヂュ", "dzo" => "ヂョ",
 120     "dja" => "ヂャ",                  "djo" => "ヂョ",
 121     "bya" => "ビャ", "byu" => "ビュ", "byo" => "ビョ",
 122     "pya" => "ピャ", "pyu" => "ピュ", "pyo" => "ピョ",
 123 
 124     "TSU" => "ッ"
 125 );
 126 
 127 my(%comn) = (
 128     "-"   => "ー",
 129     "."   => "。",
 130     ","   => "、",
 131     "!"   => "!",
 132     "?"   => "?",
 133     "~"   => "〜",
 134     "  "  => " ",
 135     "["   => "〔", "]"   => "〕",
 136     "{"   => "【", "}"   => "】",
 137     "("   => "(", ")"   => ")",
 138     "0"   => "0", "1"   => "1", "2"   => "2", "3"   => "3", "4"   => "4",
 139     "5"   => "5", "6"   => "6", "7"   => "7", "8"   => "8", "9"   => "9",
 140     "*"   => "★", # ☆ is uglier :P
 141     # where to put ♪ ?
 142 );
 143 
 144 my(@squot) = ( "「", "」" );
 145 my($squoti) = 0;
 146 my(@dquot) = ( "『", "』" );
 147 my($dquoti) = 0;
 148 
 149 sub r2hk ($$) {
 150     my($str) = "";
 151     my($pos) = 0;
 152     my($inlen) = length($_[0]);
 153     my($last) = "";
 154     my($href) = $_[1];
 155     my($inp) = lc($_[0]);
 156 
 157     while ($pos < $inlen) {
 158 	my($len);
 159 	my($p) = substr($inp, $pos, 3);
 160 	my($h) = ${$href}{$p};
 161 
 162 	# this could be done with another cycle, but this way's faster i guess
 163 	if ($h) {
 164 	    $len = 3;
 165 	} else {
 166 	    $p = substr($inp, $pos, 2);
 167 	    $h = ${$href}{$p};
 168 	    if ($h) {
 169 		$len = 2;
 170 	    } else {
 171 		$p = substr($inp, $pos, 1);
 172 		$h = ${$href}{$p};
 173 		if (!$h) {
 174 		    if ($p eq "'") {
 175 			$h = $squot[$squoti];
 176 			$squoti = 1 - $squoti;
 177 		    } elsif ($p eq "\"") {
 178 			$h = $dquot[$dquoti];
 179 			$dquoti = 1 - $dquoti;
 180 		    } else {
 181 			$h = $p;
 182 		    }
 183 		}
 184 		$len = 1;
 185 	    }
 186 	}
 187 
 188 	if ($h ne $p) {
 189 	    if ($last) {
 190 		if ($last eq substr($p, 0, 1)) {
 191 		    $str .= ${$href}{"TSU"};
 192 		} else {
 193 		    $str .= $last;
 194 		}
 195 		$last = "";
 196 	    }
 197 	} else {
 198 	    $str .= $last;
 199 	    $last = $p;
 200 	    $h = "";
 201 	}
 202 
 203 	$str .= $h;
 204 	
 205 	$pos += $len;
 206     }
 207 
 208     $str .= $last;
 209 
 210     return $str;
 211 }
 212 
 213 my($lock_ev) = 0;
 214 
 215 sub event1 {
 216     my ($line, $server, $witem) = @_;
 217 
 218     return unless ref $witem;
 219     if ($lock_ev) { return };
 220     $squoti = 0;
 221     $dquoti = 0;
 222 
 223     my ($str) = "";
 224     my (@p) = split(//, $line);
 225     my ($i);
 226     my ($inside) = 0;
 227     my ($empty) = 0;
 228 
 229     for ($i = 0; $i <= $#p; $i++) {
 230 	if ($inside) {
 231 	    if (!$p[$i]) {
 232 		$empty++;
 233 	    } else {
 234 		if ($empty == 0) {
 235 		    $str .= r2hk($p[$i], \%hira);
 236 		} else {
 237 		    $str .= r2hk($p[$i], \%kata);
 238 		}
 239 		$empty = 0;
 240 		$inside = 0;
 241 	    }
 242 	} else {
 243 	    $str .= $p[$i];
 244 	    $inside = 1;
 245 	}
 246     }
 247 
 248     $lock_ev = 1;
 249     Irssi::signal_emit('send command', $str, $server, $witem);
 250     Irssi::signal_stop();
 251     $lock_ev = 0;
 252 }
 253 
 254 sub cmd_romaji {
 255     Irssi::print('%BRomaji (with ひらがな and カタカナ support) version '.$VERSION);
 256     Irssi::print('(this is amateur product and comes with %Wno warranty%n, see the source)');
 257     Irssi::print('Text enclosed in Ctrl-Rs (like this) will be converted to hiragana.');
 258     Irssi::print('If the opening ^R is doubled, it will be converted to katakana.');
 259     Irssi::print('Example: genki -> げんき and genki -> ゲンキ');
 260 }
 261 
 262 Irssi::signal_add('send command', "event1");
 263 Irssi::command_bind('romaji', \&cmd_romaji);
 264 
 265 Irssi::print('%B'.$IRSSI{name}.' '.$VERSION.'%n loaded; type /romaji for more info');
 266 
 267 # Add the common hash to hiragana and kitakana hashes
 268 my($k, $v);
 269 
 270 while (($k, $v) = each %comn) {
 271     $hira{$k} = $v;
 272     $kata{$k} = $v;
 273 }