html/romaji.pl
1 #!/usr/bin/perl -w
2 #
3 # Copyright (c) 2002 Victor Ivanov <v0rbiz@yahoo.com>
4 # All rights reserved.
5 #
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions
8 # are met:
9 # 1. Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 # 2. Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 #
15 # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 # SUCH DAMAGE.
26 #
27
28 use strict;
29 use vars qw($VERSION %IRSSI);
30
31 $VERSION = '1.0b3';
32 %IRSSI = (
33 authors => 'Victor Ivanov',
34 contact => 'v0rbiz@yahoo.com',
35 name => 'romaji',
36 description => 'translates romaji to hiragana or katakana in text enclosed in ^R',
37 license => 'BSD 2-clause',
38 url => 'http://irssi.org/scripts/'
39 );
40
41
42 my(%hira) = (
43 "a" => "ã", "i" => "ã", "u" => "ã", "e" => "ã", "o" => "ã",
44 "ka" => "ã", "ki" => "ã", "ku" => "ã", "ke" => "ã", "ko" => "ã",
45 "sa" => "ã", "shi" => "ã", "su" => "ã", "se" => "ã", "so" => "ã",
46 "ta" => "ã", "chi" => "ã¡", "tsu" => "ã¤", "te" => "ã¦", "to" => "ã¨",
47 "na" => "ãª", "ni" => "ã«", "nu" => "ã¬", "ne" => "ã", "no" => "ã®",
48 "ha" => "ã¯", "hi" => "ã²", "hu" => "ãµ", "he" => "ã¸", "ho" => "ã»", "fu" => "ãµ",
49 "ma" => "ã¾", "mi" => "ã¿", "mu" => "ã", "me" => "ã", "mo" => "ã",
50 "ya" => "ã", "yu" => "ã", "yo" => "ã",
51 "ra" => "ã", "ri" => "ã", "ru" => "ã", "re" => "ã", "ro" => "ã",
52 "wa" => "ã", "wi" => "ã", "we" => "ã", "wo" => "ã",
53 "n" => "ã",
54 "m" => "ã",
55
56 "ga" => "ã", "gi" => "ã", "gu" => "ã", "ge" => "ã", "go" => "ã",
57 "za" => "ã", "ji" => "ã", "zu" => "ã", "ze" => "ã", "zo" => "ã",
58 "da" => "ã ", "dzi" => "ã¢", "dzu" => "ã¥", "de" => "ã§", "do" => "ã©",
59 "ba" => "ã°", "bi" => "ã³", "bu" => "ã¶", "be" => "ã¹", "bo" => "ã¼",
60 "pa" => "ã±", "pi" => "ã´", "pu" => "ã·", "pe" => "ãº", "po" => "ã½",
61
62 "fa" => "ãµã", "fi" => "ãµã", "fe" => "ãµã", "fo" => "ãµã",
63 "di" => "ã§ã",
64
65 "kya" => "ãã", "kyu" => "ãã
", "kyo" => "ãã",
66 "sha" => "ãã", "shu" => "ãã
", "sho" => "ãã",
67 "cha" => "ã¡ã", "chu" => "ã¡ã
", "cho" => "ã¡ã",
68 "nya" => "ã«ã", "nyu" => "ã«ã
", "nyo" => "ã«ã",
69 "hya" => "ã²ã", "hyu" => "ã²ã
", "hyo" => "ã²ã",
70 "mya" => "ã¿ã", "myu" => "ã¿ã
", "myo" => "ã¿ã",
71 "rya" => "ãã", "ryu" => "ãã
", "ryo" => "ãã",
72 "gya" => "ãã", "gyu" => "ãã
", "gyo" => "ãã",
73 "ja" => "ãã", "ju" => "ãã
", "jo" => "ãã",
74 "jya" => "ãã", "jyu" => "ãã
", "jyo" => "ãã",
75 "dza" => "ã¢ã", "dju" => "ã¢ã
", "dzo" => "ã¢ã",
76 "dja" => "ã¢ã", "djo" => "ã¢ã",
77 "bya" => "ã³ã", "byu" => "ã³ã
", "byo" => "ã³ã",
78 "pya" => "ã´ã", "pyu" => "ã´ã
", "pyo" => "ã´ã",
79
80 "TSU" => "ã£"
81 );
82
83 my(%kata) = (
84 "a" => "ã¢", "i" => "ã¤", "u" => "ã¦", "e" => "ã¨", "o" => "ãª",
85 "ka" => "ã«", "ki" => "ã", "ku" => "ã¯", "ke" => "ã±", "ko" => "ã³",
86 "sa" => "ãµ", "shi" => "ã·", "su" => "ã¹", "se" => "ã»", "so" => "ã½",
87 "ta" => "ã¿", "chi" => "ã", "tsu" => "ã", "te" => "ã", "to" => "ã",
88 "na" => "ã", "ni" => "ã", "nu" => "ã", "ne" => "ã", "no" => "ã",
89 "ha" => "ã", "hi" => "ã", "hu" => "ã", "he" => "ã", "ho" => "ã", "fu" => "ã",
90 "ma" => "ã", "mi" => "ã", "mu" => "ã ", "me" => "ã¡", "mo" => "ã¢",
91 "ya" => "ã¤", "yu" => "ã¦", "yo" => "ã¨", "ye" => "ã¨",
92 "ra" => "ã©", "ri" => "ãª", "ru" => "ã«", "re" => "ã¬", "ro" => "ã",
93 "wa" => "ã¯", "wi" => "ã°", "we" => "ã±", "wo" => "ã²",
94 "n" => "ã³",
95 "m" => "ã³",
96
97 "ga" => "ã¬", "gi" => "ã®", "gu" => "ã°", "ge" => "ã²", "go" => "ã´",
98 "za" => "ã¶", "ji" => "ã¸", "zu" => "ãº", "ze" => "ã¼", "zo" => "ã¾",
99 "da" => "ã", "dzi" => "ã", "dzu" => "ã
", "de" => "ã", "do" => "ã",
100 "ba" => "ã", "bi" => "ã", "bu" => "ã", "be" => "ã", "bo" => "ã",
101 "pa" => "ã", "pi" => "ã", "pu" => "ã", "pe" => "ã", "po" => "ã",
102
103 "va" => "ã´ã¡", "vi" => "ã´ã£", "vu" => "ã´", "ve" => "ã´ã§", "vo" => "ã´ã©",
104 "fa" => "ãã¡", "fi" => "ãã£", "fe" => "ãã§", "fo" => "ãã©",
105 "di" => "ãã£",
106
107 "dje" => "ãã§", "dze" => "ãã§",
108
109 "kya" => "ãã£", "kyu" => "ãã¥", "kyo" => "ãã§",
110 "sha" => "ã·ã£", "shu" => "ã·ã¥", "sho" => "ã·ã§",
111 "cha" => "ãã£", "chu" => "ãã¥", "cho" => "ãã§",
112 "nya" => "ãã£", "nyu" => "ãã¥", "nyo" => "ãã§",
113 "hya" => "ãã£", "hyu" => "ãã¥", "hyo" => "ãã§",
114 "mya" => "ãã£", "myu" => "ãã¥", "myo" => "ãã§",
115 "rya" => "ãªã£", "ryu" => "ãªã¥", "ryo" => "ãªã§",
116 "gya" => "ã®ã£", "gyu" => "ã®ã¥", "gyo" => "ã®ã§",
117 "ja" => "ã¸ã£", "ju" => "ã¸ã¥", "jo" => "ã¸ã§",
118 "jya" => "ã¸ã£", "jyu" => "ã¸ã¥", "jyo" => "ã¸ã§",
119 "dza" => "ãã£", "dju" => "ãã¥", "dzo" => "ãã§",
120 "dja" => "ãã£", "djo" => "ãã§",
121 "bya" => "ãã£", "byu" => "ãã¥", "byo" => "ãã§",
122 "pya" => "ãã£", "pyu" => "ãã¥", "pyo" => "ãã§",
123
124 "TSU" => "ã"
125 );
126
127 my(%comn) = (
128 "-" => "ã¼",
129 "." => "ã",
130 "," => "ã",
131 "!" => "ï¼",
132 "?" => "ï¼",
133 "~" => "ã",
134 " " => "ã",
135 "[" => "ã", "]" => "ã",
136 "{" => "ã", "}" => "ã",
137 "(" => "ï¼", ")" => "ï¼",
138 "0" => "ï¼", "1" => "ï¼", "2" => "ï¼", "3" => "ï¼", "4" => "ï¼",
139 "5" => "ï¼", "6" => "ï¼", "7" => "ï¼", "8" => "ï¼", "9" => "ï¼",
140 "*" => "â
", # â is uglier :P
141 # where to put ⪠?
142 );
143
144 my(@squot) = ( "ã", "ã" );
145 my($squoti) = 0;
146 my(@dquot) = ( "ã", "ã" );
147 my($dquoti) = 0;
148
149 sub r2hk ($$) {
150 my($str) = "";
151 my($pos) = 0;
152 my($inlen) = length($_[0]);
153 my($last) = "";
154 my($href) = $_[1];
155 my($inp) = lc($_[0]);
156
157 while ($pos < $inlen) {
158 my($len);
159 my($p) = substr($inp, $pos, 3);
160 my($h) = ${$href}{$p};
161
162 # this could be done with another cycle, but this way's faster i guess
163 if ($h) {
164 $len = 3;
165 } else {
166 $p = substr($inp, $pos, 2);
167 $h = ${$href}{$p};
168 if ($h) {
169 $len = 2;
170 } else {
171 $p = substr($inp, $pos, 1);
172 $h = ${$href}{$p};
173 if (!$h) {
174 if ($p eq "'") {
175 $h = $squot[$squoti];
176 $squoti = 1 - $squoti;
177 } elsif ($p eq "\"") {
178 $h = $dquot[$dquoti];
179 $dquoti = 1 - $dquoti;
180 } else {
181 $h = $p;
182 }
183 }
184 $len = 1;
185 }
186 }
187
188 if ($h ne $p) {
189 if ($last) {
190 if ($last eq substr($p, 0, 1)) {
191 $str .= ${$href}{"TSU"};
192 } else {
193 $str .= $last;
194 }
195 $last = "";
196 }
197 } else {
198 $str .= $last;
199 $last = $p;
200 $h = "";
201 }
202
203 $str .= $h;
204
205 $pos += $len;
206 }
207
208 $str .= $last;
209
210 return $str;
211 }
212
213 my($lock_ev) = 0;
214
215 sub event1 {
216 my ($line, $server, $witem) = @_;
217
218 return unless ref $witem;
219 if ($lock_ev) { return };
220 $squoti = 0;
221 $dquoti = 0;
222
223 my ($str) = "";
224 my (@p) = split(//, $line);
225 my ($i);
226 my ($inside) = 0;
227 my ($empty) = 0;
228
229 for ($i = 0; $i <= $#p; $i++) {
230 if ($inside) {
231 if (!$p[$i]) {
232 $empty++;
233 } else {
234 if ($empty == 0) {
235 $str .= r2hk($p[$i], \%hira);
236 } else {
237 $str .= r2hk($p[$i], \%kata);
238 }
239 $empty = 0;
240 $inside = 0;
241 }
242 } else {
243 $str .= $p[$i];
244 $inside = 1;
245 }
246 }
247
248 $lock_ev = 1;
249 Irssi::signal_emit('send command', $str, $server, $witem);
250 Irssi::signal_stop();
251 $lock_ev = 0;
252 }
253
254 sub cmd_romaji {
255 Irssi::print('%BRomaji (with ã²ãã㪠and ã«ã¿ã«ã support) version '.$VERSION);
256 Irssi::print('(this is amateur product and comes with %Wno warranty%n, see the source)');
257 Irssi::print('Text enclosed in Ctrl-Rs (like this) will be converted to hiragana.');
258 Irssi::print('If the opening ^R is doubled, it will be converted to katakana.');
259 Irssi::print('Example: genki -> ããã and genki -> ã²ã³ã');
260 }
261
262 Irssi::signal_add('send command', "event1");
263 Irssi::command_bind('romaji', \&cmd_romaji);
264
265 Irssi::print('%B'.$IRSSI{name}.' '.$VERSION.'%n loaded; type /romaji for more info');
266
267 # Add the common hash to hiragana and kitakana hashes
268 my($k, $v);
269
270 while (($k, $v) = each %comn) {
271 $hira{$k} = $v;
272 $kata{$k} = $v;
273 }