html/url_log.pl


   1 # url grabber, yes it sucks
   2 #
   3 # infected with the gpl virus
   4 #
   5 # Thomas Graf <tgraf@europe.com>
   6 #
   7 # version: 0.2
   8 #
   9 # Commands:
  10 #
  11 #   /URL LIST
  12 #   /URL CLEAR
  13 #   /URL OPEN [<nr>]
  14 #   /URL QUOTE [<nr>]
  15 #   /URL HEAD [<nr>]            !! Blocking !!
  16 #   /HEAD <url>                 !! Blocking !!
  17 #
  18 # Config Values
  19 #
  20 # [url logfile]
  21 #  url_log                log urls to url_log_file
  22 #  url_log_file           file to save urls
  23 #  url_log_format         format in url logfile
  24 #  url_log_timestamp      format of timestamp in url logfile
  25 #
  26 # [url log in memory]
  27 #  url_log_browser        command to execute to open url, %f will be replaced with the url
  28 #  url_log_size           keep that many urls in the list
  29 #
  30 # [http head stuff]
  31 #  url_head_format        format of HEAD output
  32 #  url_auto_head          do a head on every url received
  33 #  url_auto_head_format   format of auto head output
  34 #
  35 #
  36 # Database installation
  37 # - create database and user
  38 # - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
  39 #      time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255));
  40 #   or similiar :)
  41 #
  42 #
  43 # todo:
  44 #
  45 #  - fix XXX marks
  46 #  - xml output?
  47 #  - don't output "bytes" if content-length is not available
  48 #  - prefix with http:// if no prefix is given
  49 
  50 use Irssi;
  51 use Irssi::Irc;
  52 
  53 $VERSION = "0.2";
  54 %IRSSI = (
  55     authors     => 'Thomas Graf',
  56     contact     => 'irssi@reeler.org',
  57     name        => 'url_log',
  58     description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.',
  59     license     => 'GNU GPLv2 or later',
  60     url         => 'http://irssi.reeler.org/url/',
  61 );
  62 
  63 use LWP;
  64 use LWP::UserAgent;
  65 use HTTP::Status;
  66 use DBI;
  67 
  68 use POSIX qw(strftime);
  69 
  70 use strict;
  71 
  72 my @urls;
  73 my $user_agent = new LWP::UserAgent;
  74 
  75 $user_agent->agent("IrssiUrlLog/0.2");
  76 
  77 # hmm... stolen..
  78 # -verbatim- import expand
  79 sub expand {
  80   my ($string, %format) = @_;
  81   my ($exp, $repl);
  82   $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format));
  83   return $string;
  84 }
  85 # -verbatim- end
  86 
  87 sub print_msg
  88 {
  89     Irssi::active_win()->print("@_");
  90 }
  91 
  92 #
  93 # open url in brower using url_log_brower command
  94 #
  95 sub open_url
  96 {
  97     my ($data) = @_;
  98 
  99     my ($nick, $target, $url) = split(/ /, $data);
 100 
 101     my $pid = fork();
 102 
 103     if ($pid) {
 104         Irssi::pidwait_add($pid);
 105     } elsif (defined $pid) { # $pid is zero here if defined
 106         my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url);
 107         # XXX use exec
 108         system $data;
 109         exit;
 110     } else {
 111         # weird fork error
 112         print_msg "Can't fork: $!";
 113     }
 114 }
 115 
 116 sub head
 117 {
 118     my ($url) = @_;
 119     my $req = new HTTP::Request HEAD => $url;
 120     my $res = $user_agent->request($req);
 121     return $res;
 122 }
 123 
 124 #
 125 # do a HEAD
 126 #
 127 sub do_head
 128 {
 129     my ($url) = @_;
 130 
 131     my $res = head($url);
 132 
 133     if ($res->code ne RC_OK) {
 134         Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" .
 135             $res->status_line());
 136     } else {
 137 
 138         my $t = expand(Irssi::settings_get_str("url_head_format"),
 139            "u", $url,
 140            "t", scalar $res->content_type,
 141            "l", scalar $res->content_length,
 142            "s", scalar $res->server);
 143 
 144         Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t);
 145     }
 146 }
 147 
 148 #
 149 # called if url is detected, should do a HEAD and print a 1-liner
 150 #
 151 sub do_auto_head
 152 {
 153     my ($url, $window) = @_;
 154 
 155     return if ($url !~ /^http:\/\//);
 156 
 157     my $res = head($url);
 158 
 159     if ($res->code ne RC_OK) {
 160         $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line());
 161     } else {
 162 
 163         my $t = expand(Irssi::settings_get_str("url_auto_head_format"),
 164            "u", $url,
 165            "c", $res->code,
 166            "t", scalar $res->content_type,
 167            "l", scalar $res->content_length,
 168            "s", scalar $res->server);
 169 
 170         $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t);
 171     }
 172 }
 173 
 174 #
 175 # log url to file
 176 #
 177 sub log_to_file
 178 {
 179     my ($nick, $target, $text) = @_;
 180     my ($lfile) = glob Irssi::settings_get_str("url_log_file");
 181 
 182     if ( open(LFD, ">> $lfile") ) {
 183 
 184         my %h = {
 185             time => time,
 186             nick => $nick,
 187             target => $target,
 188             url => $text
 189         };
 190 
 191         print LFD expand(Irssi::settings_get_str("url_log_format"),
 192           "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime),
 193           "n", $nick,
 194           "t", $target,
 195           "u", $text), "\n";
 196 
 197         close LFD;
 198     } else {
 199         print_msg "Warning: Unable to open file $lfile $!";
 200     }
 201 }
 202 
 203 
 204 #
 205 # log url to database
 206 #
 207 sub log_to_database
 208 {
 209     my ($nick, $target, $text) = @_;
 210 
 211     # this is quite expensive, but...
 212     my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"),
 213                            Irssi::settings_get_str("url_log_db_user"),
 214                            Irssi::settings_get_str("url_log_db_password"))
 215     or print_msg "Can't connect to database " . $DBI::errstr;
 216 
 217     if ($dbh) {
 218 
 219         my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," .
 220           $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")";
 221 
 222         $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr;
 223 
 224         $dbh->disconnect();
 225     }
 226 }
 227 
 228 #
 229 # head command handler
 230 #
 231 sub sig_head
 232 {
 233     my ($cmd_line, $server, $win_item) = @_;
 234     my @args = split(' ', $cmd_line);
 235 
 236     my $url;
 237 
 238     if (@args <= 0) {
 239 
 240         if ($#urls eq 0) {
 241             return;
 242         }
 243 
 244         $url = $urls[$#urls];
 245         $url =~ s/^.*?\s.*?\s//;
 246     } else {
 247         $url = lc(shift(@args));
 248     }
 249 
 250     do_head($url);
 251 }
 252 
 253 #
 254 # msg handler
 255 #
 256 sub sig_msg
 257 {
 258     my ($server, $data, $nick, $address) = @_;
 259     my ($target, $text) = split(/ :/, $data, 2);
 260 
 261     # very special, but better than just \w::/* and www.*
 262     while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) {
 263 
 264         return if ($1 =~ /^\.\./);
 265 
 266         push @urls, "$nick $target $2";
 267 
 268         # XXX resize correctly if delta is > 1
 269         if ($#urls >= Irssi::settings_get_int("url_log_size")) {
 270             shift @urls;
 271         }
 272 
 273         my $ischannel = $server->ischannel($target);
 274         my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS;
 275         $target = $nick unless $ischannel;
 276         my $window = $server->window_find_closest($target, $level);
 277 
 278         if ( Irssi::settings_get_bool("url_log_auto_head") ) {
 279             do_auto_head($2, $window);
 280         }
 281 
 282         if ( Irssi::settings_get_bool("url_log") ) {
 283             log_to_file($nick, $target, $2);
 284         }
 285 
 286         if ( Irssi::settings_get_bool("url_log_db") ) {
 287             log_to_database($nick, $target, $2);
 288         }
 289     }
 290 }
 291 
 292 sub print_url_list_item
 293 {
 294     my ($n, $data) = @_;
 295     my ($src, $dst, $url) = split(/ /, $data);
 296 
 297     Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url);
 298 }
 299 
 300 #
 301 # url command handler
 302 #
 303 sub sig_url
 304 {
 305     my ($cmd_line, $server, $win_item) = @_;
 306     my @args = split(' ', $cmd_line);
 307 
 308     if (@args <= 0) {
 309         print_msg "URL LIST [<nr>]       list all url(s)";
 310         print_msg "    OPEN [<nr>]       open url in browser";
 311         print_msg "    QUOTE [<nr>]      quote url (print to current channel)";
 312         print_msg "    HEAD              send HEAD to server";
 313         print_msg "    CLEAR             clear url list";
 314         return;
 315     }
 316 
 317     my $action = lc(shift(@args));
 318 
 319     if ($action eq "list") {
 320 
 321         if (@args > 0) {
 322             my $i = shift(@args);
 323             print_url_list_item($i, $urls[$i]);
 324         } else {
 325             my $i = 0;
 326             foreach my $l (@urls) {
 327                 print_url_list_item($i, $l);
 328                 $i++;
 329             }
 330         }
 331 
 332     } elsif($action eq "open") {
 333 
 334         my $i = $#urls;
 335         if (@args > 0) {
 336             $i = shift(@args);
 337         }
 338         open_url($urls[$i]);
 339 
 340     } elsif ($action eq "quote") {
 341 
 342         my $i = $#urls;
 343         if (@args > 0) {
 344             $i = shift(@args);
 345         }
 346         Irssi::active_win()->command("SAY URL: " . $urls[$i]);
 347 
 348     } elsif ($action eq "clear") {
 349 
 350         splice @urls;
 351 
 352     } elsif ($action eq "head") {
 353 
 354         my $i = $#urls;
 355         if (@args > 0) {
 356             $i = shift(@args);
 357         }
 358         my $url = $urls[$i];
 359         $url =~ s/^.*?\s.*?\s//;
 360 
 361         do_head($url);
 362 
 363     } else {
 364         print_msg "Unknown action";
 365     }
 366 }
 367 
 368 
 369 Irssi::command_bind('head', 'sig_head');
 370 Irssi::command_bind('url', 'sig_url');
 371 Irssi::signal_add_first('event privmsg', 'sig_msg');
 372 
 373 Irssi::settings_add_bool("url_log", "url_log", 1);
 374 Irssi::settings_add_bool("url_log", "url_log_auto_head", 1);
 375 Irssi::settings_add_bool("url_log", "url_log_db", 0);
 376 Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost');
 377 Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url');
 378 Irssi::settings_add_str("url_log", "url_log_db_password", 'nada');
 379 Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url");
 380 Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c');
 381 Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u');
 382 Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null');
 383 Irssi::settings_add_int("url_log", "url_log_size", 25);
 384 Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes');
 385 Irssi::settings_add_str("url_log", "url_head_format", '
 386 Content-Type: %t
 387 Length:       %l bytes
 388 Server:       %s');
 389 
 390 
 391 Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-',
 392                        'url_auto_head', '[%gHEAD%n] $0-',
 393                        'url_list', '[$0] $1 %W$2%n $3-']);