# url grabber, yes it sucks
#
# infected with the gpl virus
#
# Thomas Graf <tgraf@europe.com>
#
# version: 0.2
#
# Commands:
#
#   /URL LIST
#   /URL CLEAR
#   /URL OPEN [<nr>]
#   /URL QUOTE [<nr>]
#   /URL HEAD [<nr>]            !! Blocking !!
#   /HEAD <url>                 !! Blocking !!
#
# Config Values
#
# [url logfile]
#  url_log                log urls to url_log_file
#  url_log_file           file to save urls
#  url_log_format         format in url logfile
#  url_log_timestamp      format of timestamp in url logfile
#
# [url log in memory]
#  url_log_browser        command to execute to open url, %f will be replaced with the url
#  url_log_size           keep that many urls in the list
#
# [http head stuff]
#  url_head_format        format of HEAD output
#  url_auto_head          do a head on every url received
#  url_auto_head_format   format of auto head output
#
#
# Database installation
# - create database and user
# - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
#      time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255));
#   or similiar :)
#
#
# todo:
#
#  - fix XXX marks
#  - xml output?
#  - don't output "bytes" if content-length is not available
#  - prefix with http:// if no prefix is given

use strict;
use Irssi;
use Irssi::Irc;
use vars qw($VERSION %IRSSI);

$VERSION = "0.3";
%IRSSI = (
    authors     => 'Thomas Graf',
    contact     => 'irssi@reeler.org',
    name        => 'url_log',
    description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.',
    license     => 'GNU GPLv2 or later',
    url         => 'http://irssi.reeler.org/url/',
);

use LWP;
use LWP::UserAgent;
use HTTP::Status;
use DBI;

use POSIX qw(strftime);

my @urls;
my $user_agent = new LWP::UserAgent;

$user_agent->agent("IrssiUrlLog/0.3");

# hmm... stolen..
# -verbatim- import expand
sub expand {
  my ($string, %format) = @_;
  my ($exp, $repl);
  $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format));
  return $string;
}
# -verbatim- end

sub print_msg
{
    Irssi::active_win()->print("@_");
}

#
# open url in brower using url_log_brower command
#
sub open_url
{
    my ($data) = @_;

    my ($nick, $target, $url) = split(/ /, $data);

    my $pid = fork();

    if ($pid) {
        Irssi::pidwait_add($pid);
    } elsif (defined $pid) { # $pid is zero here if defined
        my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url);
        # XXX use exec
        system $data;
        POSIX::_exit(1);
    } else {
        # weird fork error
        print_msg "Can't fork: $!";
    }
}

sub head
{
    my ($url) = @_;
    my $req = new HTTP::Request HEAD => $url;
    my $res = $user_agent->request($req);
    return $res;
}

#
# do a HEAD
#
sub do_head
{
    my ($url) = @_;

    my $res = head($url);

    if ($res->code ne RC_OK) {
        Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" .
            $res->status_line());
    } else {

        my $t = expand(Irssi::settings_get_str("url_head_format"),
           "u", $url,
           "t", scalar $res->content_type,
           "l", scalar $res->content_length,
           "s", scalar $res->server);

        Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t);
    }
}

#
# called if url is detected, should do a HEAD and print a 1-liner
#
sub do_auto_head
{
    my ($url, $window) = @_;

    return if ($url !~ /^http:\/\//);

    my $res = head($url);

    if ($res->code ne RC_OK) {
        $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line());
    } else {

        my $t = expand(Irssi::settings_get_str("url_auto_head_format"),
           "u", $url,
           "c", $res->code,
           "t", scalar $res->content_type,
           "l", scalar $res->content_length,
           "s", scalar $res->server);

        $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t);
    }
}

#
# log url to file
#
sub log_to_file
{
    my ($nick, $target, $text) = @_;
    my ($lfile) = glob Irssi::settings_get_str("url_log_file");

    if ( open(LFD, ">>", $lfile) ) {

        my %h = {
            time => time,
            nick => $nick,
            target => $target,
            url => $text
        };

        print LFD expand(Irssi::settings_get_str("url_log_format"),
          "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime),
          "n", $nick,
          "t", $target,
          "u", $text), "\n";

        close LFD;
    } else {
        print_msg "Warning: Unable to open file $lfile $!";
    }
}


#
# log url to database
#
sub log_to_database
{
    my ($nick, $target, $text) = @_;

    # this is quite expensive, but...
    my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"),
                           Irssi::settings_get_str("url_log_db_user"),
                           Irssi::settings_get_str("url_log_db_password"))
    or print_msg "Can't connect to database " . $DBI::errstr;

    if ($dbh) {

        my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," .
          $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")";

        $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr;

        $dbh->disconnect();
    }
}

#
# head command handler
#
sub sig_head
{
    my ($cmd_line, $server, $win_item) = @_;
    my @args = split(' ', $cmd_line);

    my $url;

    if (@args <= 0) {

        if ($#urls eq 0) {
            return;
        }

        $url = $urls[$#urls];
        $url =~ s/^.*?\s.*?\s//;
    } else {
        $url = lc(shift(@args));
    }

    do_head($url);
}

#
# msg handler
#
sub sig_msg
{
    my ($server, $data, $nick, $address) = @_;
    my ($target, $text) = split(/ :/, $data, 2);

    # very special, but better than just \w::/* and www.*
    while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) {

        return if ($1 =~ /^\.\./);

        push @urls, "$nick $target $2";

        # XXX resize correctly if delta is > 1
        if ($#urls >= Irssi::settings_get_int("url_log_size")) {
            shift @urls;
        }

        my $ischannel = $server->ischannel($target);
        my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS;
        $target = $nick unless $ischannel;
        my $window = $server->window_find_closest($target, $level);

        if ( Irssi::settings_get_bool("url_log_auto_head") ) {
            do_auto_head($2, $window);
        }

        if ( Irssi::settings_get_bool("url_log") ) {
            log_to_file($nick, $target, $2);
        }

        if ( Irssi::settings_get_bool("url_log_db") ) {
            log_to_database($nick, $target, $2);
        }
    }
}

sub print_url_list_item
{
    my ($n, $data) = @_;
    my ($src, $dst, $url) = split(/ /, $data);

    Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url);
}

#
# url command handler
#
sub sig_url
{
    my ($cmd_line, $server, $win_item) = @_;
    my @args = split(' ', $cmd_line);

    if (@args <= 0) {
        print_msg "URL LIST [<nr>]       list all url(s)";
        print_msg "    OPEN [<nr>]       open url in browser";
        print_msg "    QUOTE [<nr>]      quote url (print to current channel)";
        print_msg "    HEAD              send HEAD to server";
        print_msg "    CLEAR             clear url list";
        return;
    }

    my $action = lc(shift(@args));

    if ($action eq "list") {

        if (@args > 0) {
            my $i = shift(@args);
            print_url_list_item($i, $urls[$i]);
        } else {
            my $i = 0;
            foreach my $l (@urls) {
                print_url_list_item($i, $l);
                $i++;
            }
        }

    } elsif($action eq "open") {

        my $i = $#urls;
        if (@args > 0) {
            $i = shift(@args);
        }
        open_url($urls[$i]);

    } elsif ($action eq "quote") {

        my $i = $#urls;
        if (@args > 0) {
            $i = shift(@args);
        }
        Irssi::active_win()->command("SAY URL: " . $urls[$i]);

    } elsif ($action eq "clear") {

        splice @urls;

    } elsif ($action eq "head") {

        my $i = $#urls;
        if (@args > 0) {
            $i = shift(@args);
        }
        my $url = $urls[$i];
        $url =~ s/^.*?\s.*?\s//;

        do_head($url);

    } else {
        print_msg "Unknown action";
    }
}

Irssi::command_bind('head', 'sig_head');
Irssi::command_bind('url', 'sig_url');
Irssi::command_bind('url list', 'sig_url');
Irssi::command_bind('url clear', 'sig_url');
Irssi::command_bind('url open', 'sig_url');
Irssi::command_bind('url quote', 'sig_url');
Irssi::command_bind('url head', 'sig_url');
Irssi::signal_add_first('event privmsg', 'sig_msg');

Irssi::settings_add_bool("url_log", "url_log", 1);
Irssi::settings_add_bool("url_log", "url_log_auto_head", 1);
Irssi::settings_add_bool("url_log", "url_log_db", 0);
Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost');
Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url');
Irssi::settings_add_str("url_log", "url_log_db_password", 'nada');
Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url");
Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c');
Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u');
Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null');
Irssi::settings_add_int("url_log", "url_log_size", 25);
Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes');
Irssi::settings_add_str("url_log", "url_head_format", '
Content-Type: %t
Length:       %l bytes
Server:       %s');


Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-',
                       'url_auto_head', '[%gHEAD%n] $0-',
                       'url_list', '[$0] $1 %W$2%n $3-']);

# vim:set ts=4 sw=4 expandtab:
