html/irccomplete.pl


   1 # TAB complete words from dictionary
   2 # for irssi 0.7.99 by Timo Sirainen
   3 # Greatly modified by Erkki Seppälä to build dictionary of said words
   4 
   5 use Irssi;
   6 
   7 use vars qw($VERSION %IRSSI);
   8 $VERSION = "0.1";
   9 %IRSSI = (
  10     authors     => "Erkki Seppälä",
  11     contact     => "flux\@inside.org",
  12     name        => "IRC Completion",
  13     description => "Adds words from IRC to your tab-completion list, plus fixes typos",
  14     license     => "Public Domain",
  15     url         => "http://xulfad.inside.org/~flux/software/irssi/",
  16     changed     => "Thu Feb  7 22:45:55 EET 2002"
  17 );
  18 
  19 
  20 my @wordHistory;
  21 my %words;
  22 my %permanent;
  23 
  24 my $wordChars = join("", ('a'..'z', '0'..'9', 'öä'));
  25 my $maxWords = 5000;
  26 my $minWordLength = 4;
  27 my $maxWordLength = 80;
  28 my $maxTypoLength = 10;
  29 my $permanentThreshold = 1;
  30 
  31 my %typoWords;
  32 my $correctWordCounter = 1;
  33 my %correctWordsByIndex;
  34 my %correctWordsByWord;
  35 
  36 # by word
  37 sub addCorrectWord {
  38   my $index = $correctWordsByWord{$_[0]} or 0;
  39   if ($index > 0) {
  40     ++$correctWordsByIndex{$index}->[1];
  41     return $index;
  42   } else {
  43     $correctWordsByIndex{$correctWordCounter} = [$_[0], 1];
  44     $correctWordsByWord{$_[0]} = $correctWordCounter;
  45     ++$correctWordCounter;
  46     return $correctWordCounter - 1;
  47   }
  48 };
  49 
  50 # by word
  51 sub delCorrectWord {
  52   my ($word) = @_;
  53   my $index = $correctWordsByWord{$word};
  54   if (--$correctWordsByIndex{$index}->[1] == 0) {
  55     delete $correctWordsByWord{$correctWordsByIndex{$index}->[0]};
  56     delete $correctWordsByIndex{$index};
  57   }
  58 }
  59 
  60 sub sig_complete {
  61   my ($complist, $window, $word, $linestart, $want_space) = @_;
  62 
  63   $word =~ s/([^a-zA-Z0-9])/\\\1/g;
  64 
  65   @$complist = reverse (@$complist, grep(/^$word/, (keys %permanent, keys %words)));
  66 
  67   if (exists $typoWords{$word}) {
  68     my $correctWord = $correctWordsByIndex{$typoWords{$word}->[0]}->[0];
  69     @$complist = (@complist, $correctWord);
  70   }
  71 
  72   my %m = map { ($_ => $n++); } @$complist; 
  73   @$complist = ();
  74   foreach my $key (sort keys %m) { 
  75     $m2{$m{$key}}=$key; 
  76   } 
  77   foreach my $key (reverse sort keys %m2) { 
  78     push @$complist, $m2{$key};
  79   }
  80 }
  81 
  82 # $word, $removes
  83 sub generate_drops {
  84   my ($word, $changes) = @_;
  85   my @list;
  86   for (my $c = 0; $c < length($word) - 1; ++$c) {
  87     my $misWord = substr($word, 0, $c) . substr($word, $c + 1);
  88     if ($changes > 1) {
  89       push @list, generate_drops($misWord, $changes - 1);
  90     } else {
  91       push @list, $misWord;
  92     }
  93   }
  94   return @list;
  95 }
  96 
  97 sub generate_translations {
  98   my ($word, $changes) = @_;
  99   my @list;
 100   for (my $c = 1; $c < length($word); ++$c) {
 101     my $misWord = substr($word, 0, $c - 1) . substr($word, $c, 1) . substr($word, $c - 1, 1) . substr($word, $c + 1);
 102     if ($changes > 1) {
 103       push @list, generate_drops($misWord, $changes - 1);
 104     } else {
 105       push @list, $misWord;
 106     }
 107   }
 108   return @list;
 109 }
 110 
 111 # $word
 112 sub generate_typos {
 113   my $maxTypoLength = Irssi::settings_get_int('irccomplete_maximum_typo_length');
 114   my ($word) = @_;
 115 
 116   if (length($word) > $maxTypoLength) {
 117     return ();
 118   } else {
 119     return (generate_drops($word, 1), generate_translations($word));
 120   }
 121 }
 122 
 123 sub sig_message {
 124   my ($server, $message) = @_;
 125   my $maxWords = Irssi::settings_get_int('irccomplete_words');
 126   my $minWordLength = Irssi::settings_get_int('irccomplete_minimum_length');
 127   my $maxWordLength = Irssi::settings_get_int('irccomplete_maximum_length');
 128   my $wordChars = Irssi::settings_get_str("irccomplete_word_characters");
 129   my $permanentThreshold = Irssi::settings_get_int('irccomplete_permanent_percent');
 130   foreach my $word (split(/[^$wordChars]/, $message)) {
 131     if (length($word) >= $minWordLength && length($word) <= $maxWordLength) {
 132       if (++$words{$word} > $permanentThreshold / 100.0 * $maxWords) {
 133 	if (++$permanent{$word} == 1) {
 134 	  #Irssi::printformat(MSGLEVEL_CLIENTNOTICE, 'irccomplete_permanent', $word);
 135 	  Irssi::print "Added $word to the list of permanent words";
 136 	}
 137       }
 138       push @wordHistory, $word;
 139       my $wordIndex = addCorrectWord($word);
 140       foreach my $misword (generate_typos($word, 1)) {
 141 	if (!exists $typoWords{$misword}) {
 142 	  $typoWords{$misword} = [$wordIndex, 1];
 143 	} else {
 144 	  ++$typoWords{$misword}->[1];
 145 	}
 146       }
 147       while (@wordHistory > $maxWords) {
 148 	my $word = shift @wordHistory;
 149 	if (--$words{$word} == 0) {
 150 	  delete $words{$word};
 151 	}
 152 	foreach my $misword (generate_typos($word, 1)) {
 153 	  if (--$typoWords{$misword}->[1] == 0) {
 154 	    delete $typoWords{$misword};
 155 	  }
 156 	}
 157 	delCorrectWord($word);
 158       }
 159     }
 160   }
 161 
 162 
 163   return 1;
 164 }
 165 
 166 sub cmd_typowords {
 167   Irssi::print (scalar(@wordHistory) . " words, " . 
 168 		scalar(keys %typoWords) . " typowords, " .
 169 		scalar(keys %correctWordsByWord) . "x" . scalar(keys %correctWordsByIndex) . " correct words");
 170   my $line = "";
 171 
 172   foreach my $word (keys %typoWords) {
 173     $line .= $word . "|" . $typoWords{$word}->[0] . " ";
 174   }
 175   Irssi::print "$line";
 176   $line = "";
 177 
 178   foreach my $index (keys %correctWordsByIndex) {
 179     $line .= $index . ":[" . join("|", @{$correctWordsByIndex{$index}}) . "] ";
 180   }
 181   Irssi::print "$line";
 182   $line = "";
 183   
 184   foreach my $word (keys %correctWordsByWord) {
 185     $line .= $word . ":" . $correctWordsByWord{$word} . " ";
 186   }
 187   Irssi::print "$line";
 188   $line = "";
 189   
 190   return 1;
 191 };
 192 
 193 Irssi::theme_register(['irccomplete_permanent', 'Added $1 to the list of permanent words']);
 194 
 195 Irssi::settings_add_str("misc", "irccomplete_word_characters", $wordChars);
 196 Irssi::settings_add_int("misc", "irccomplete_words", $maxWords);
 197 Irssi::settings_add_int("misc", "irccomplete_minimum_length", $minWordLength);
 198 Irssi::settings_add_int("misc", "irccomplete_maximum_length", $maxWordLength);
 199 Irssi::settings_add_int("misc", "irccomplete_maximum_typo_length", $maxTypoLength);
 200 Irssi::settings_add_int("misc", "irccomplete_permanent_percent", $permanentThreshold);
 201 
 202 foreach my $sig ("message public", "message private", 
 203 		 "message own_public", "message own_private", 
 204 		 "message topic") {
 205 #foreach my $sig ("message own_public", "message own_private") {
 206   Irssi::signal_add($sig, "sig_message");
 207 }
 208 Irssi::signal_add_last('complete word', 'sig_complete');
 209 
 210 Irssi::command_bind("irccomplete_typowords", "cmd_typowords");