html/url_log.pl
1 # url grabber, yes it sucks
2 #
3 # infected with the gpl virus
4 #
5 # Thomas Graf <tgraf@europe.com>
6 #
7 # version: 0.2
8 #
9 # Commands:
10 #
11 # /URL LIST
12 # /URL CLEAR
13 # /URL OPEN [<nr>]
14 # /URL QUOTE [<nr>]
15 # /URL HEAD [<nr>] !! Blocking !!
16 # /HEAD <url> !! Blocking !!
17 #
18 # Config Values
19 #
20 # [url logfile]
21 # url_log log urls to url_log_file
22 # url_log_file file to save urls
23 # url_log_format format in url logfile
24 # url_log_timestamp format of timestamp in url logfile
25 #
26 # [url log in memory]
27 # url_log_browser command to execute to open url, %f will be replaced with the url
28 # url_log_size keep that many urls in the list
29 #
30 # [http head stuff]
31 # url_head_format format of HEAD output
32 # url_auto_head do a head on every url received
33 # url_auto_head_format format of auto head output
34 #
35 #
36 # Database installation
37 # - create database and user
38 # - create table url ( id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
39 # time INT UNSIGNED, nick VARCHAR(25), target VARCHAR(25), url VARCHAR(255));
40 # or similiar :)
41 #
42 #
43 # todo:
44 #
45 # - fix XXX marks
46 # - xml output?
47 # - don't output "bytes" if content-length is not available
48 # - prefix with http:// if no prefix is given
49
50 use Irssi;
51 use Irssi::Irc;
52
53 $VERSION = "0.2";
54 %IRSSI = (
55 authors => 'Thomas Graf',
56 contact => 'irssi@reeler.org',
57 name => 'url_log',
58 description => 'logs urls to textfile or/and database, able to list, quote, open or `http head` saved urls.',
59 license => 'GNU GPLv2 or later',
60 url => 'http://irssi.reeler.org/url/',
61 );
62
63 use LWP;
64 use LWP::UserAgent;
65 use HTTP::Status;
66 use DBI;
67
68 use POSIX qw(strftime);
69
70 use strict;
71
72 my @urls;
73 my $user_agent = new LWP::UserAgent;
74
75 $user_agent->agent("IrssiUrlLog/0.2");
76
77 # hmm... stolen..
78 # -verbatim- import expand
79 sub expand {
80 my ($string, %format) = @_;
81 my ($exp, $repl);
82 $string =~ s/%$exp/$repl/g while (($exp, $repl) = each(%format));
83 return $string;
84 }
85 # -verbatim- end
86
87 sub print_msg
88 {
89 Irssi::active_win()->print("@_");
90 }
91
92 #
93 # open url in brower using url_log_brower command
94 #
95 sub open_url
96 {
97 my ($data) = @_;
98
99 my ($nick, $target, $url) = split(/ /, $data);
100
101 my $pid = fork();
102
103 if ($pid) {
104 Irssi::pidwait_add($pid);
105 } elsif (defined $pid) { # $pid is zero here if defined
106 my $data = expand(Irssi::settings_get_str("url_log_browser"), "f", $url);
107 # XXX use exec
108 system $data;
109 exit;
110 } else {
111 # weird fork error
112 print_msg "Can't fork: $!";
113 }
114 }
115
116 sub head
117 {
118 my ($url) = @_;
119 my $req = new HTTP::Request HEAD => $url;
120 my $res = $user_agent->request($req);
121 return $res;
122 }
123
124 #
125 # do a HEAD
126 #
127 sub do_head
128 {
129 my ($url) = @_;
130
131 my $res = head($url);
132
133 if ($res->code ne RC_OK) {
134 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, "\n" .
135 $res->status_line());
136 } else {
137
138 my $t = expand(Irssi::settings_get_str("url_head_format"),
139 "u", $url,
140 "t", scalar $res->content_type,
141 "l", scalar $res->content_length,
142 "s", scalar $res->server);
143
144 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_head', $url, $t);
145 }
146 }
147
148 #
149 # called if url is detected, should do a HEAD and print a 1-liner
150 #
151 sub do_auto_head
152 {
153 my ($url, $window) = @_;
154
155 return if ($url !~ /^http:\/\//);
156
157 my $res = head($url);
158
159 if ($res->code ne RC_OK) {
160 $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $res->status_line());
161 } else {
162
163 my $t = expand(Irssi::settings_get_str("url_auto_head_format"),
164 "u", $url,
165 "c", $res->code,
166 "t", scalar $res->content_type,
167 "l", scalar $res->content_length,
168 "s", scalar $res->server);
169
170 $window->printformat(MSGLEVEL_CRAP, 'url_auto_head', $t);
171 }
172 }
173
174 #
175 # log url to file
176 #
177 sub log_to_file
178 {
179 my ($nick, $target, $text) = @_;
180 my ($lfile) = glob Irssi::settings_get_str("url_log_file");
181
182 if ( open(LFD, ">> $lfile") ) {
183
184 my %h = {
185 time => time,
186 nick => $nick,
187 target => $target,
188 url => $text
189 };
190
191 print LFD expand(Irssi::settings_get_str("url_log_format"),
192 "s", strftime(Irssi::settings_get_str("url_log_timestamp_format"), localtime),
193 "n", $nick,
194 "t", $target,
195 "u", $text), "\n";
196
197 close LFD;
198 } else {
199 print_msg "Warning: Unable to open file $lfile $!";
200 }
201 }
202
203
204 #
205 # log url to database
206 #
207 sub log_to_database
208 {
209 my ($nick, $target, $text) = @_;
210
211 # this is quite expensive, but...
212 my $dbh = DBI->connect(Irssi::settings_get_str("url_log_db_dsn"),
213 Irssi::settings_get_str("url_log_db_user"),
214 Irssi::settings_get_str("url_log_db_password"))
215 or print_msg "Can't connect to database " . $DBI::errstr;
216
217 if ($dbh) {
218
219 my $sql = "INSERT INTO url (time, nick, target, url) VALUES (UNIX_TIMESTAMP()," .
220 $dbh->quote($nick) . "," . $dbh->quote($target) . "," . $dbh->quote($text) . ")";
221
222 $dbh->do($sql) or print_msg "Can't execute sql command: " . $DBI::errstr;
223
224 $dbh->disconnect();
225 }
226 }
227
228 #
229 # head command handler
230 #
231 sub sig_head
232 {
233 my ($cmd_line, $server, $win_item) = @_;
234 my @args = split(' ', $cmd_line);
235
236 my $url;
237
238 if (@args <= 0) {
239
240 if ($#urls eq 0) {
241 return;
242 }
243
244 $url = $urls[$#urls];
245 $url =~ s/^.*?\s.*?\s//;
246 } else {
247 $url = lc(shift(@args));
248 }
249
250 do_head($url);
251 }
252
253 #
254 # msg handler
255 #
256 sub sig_msg
257 {
258 my ($server, $data, $nick, $address) = @_;
259 my ($target, $text) = split(/ :/, $data, 2);
260
261 # very special, but better than just \w::/* and www.*
262 while ($text =~ s#.*?(^|\s)(\w+?://.+?|[\w\.]{3,}/[\w~\.]+?(/|/\w+?\.\w+?))(\s|$)(.*)#$5#i) {
263
264 return if ($1 =~ /^\.\./);
265
266 push @urls, "$nick $target $2";
267
268 # XXX resize correctly if delta is > 1
269 if ($#urls >= Irssi::settings_get_int("url_log_size")) {
270 shift @urls;
271 }
272
273 my $ischannel = $server->ischannel($target);
274 my $level = $ischannel ? MSGLEVEL_PUBLIC : MSGLEVEL_MSGS;
275 $target = $nick unless $ischannel;
276 my $window = $server->window_find_closest($target, $level);
277
278 if ( Irssi::settings_get_bool("url_log_auto_head") ) {
279 do_auto_head($2, $window);
280 }
281
282 if ( Irssi::settings_get_bool("url_log") ) {
283 log_to_file($nick, $target, $2);
284 }
285
286 if ( Irssi::settings_get_bool("url_log_db") ) {
287 log_to_database($nick, $target, $2);
288 }
289 }
290 }
291
292 sub print_url_list_item
293 {
294 my ($n, $data) = @_;
295 my ($src, $dst, $url) = split(/ /, $data);
296
297 Irssi::active_win()->printformat(MSGLEVEL_CRAP, 'url_list', $n, $src, $dst, $url);
298 }
299
300 #
301 # url command handler
302 #
303 sub sig_url
304 {
305 my ($cmd_line, $server, $win_item) = @_;
306 my @args = split(' ', $cmd_line);
307
308 if (@args <= 0) {
309 print_msg "URL LIST [<nr>] list all url(s)";
310 print_msg " OPEN [<nr>] open url in browser";
311 print_msg " QUOTE [<nr>] quote url (print to current channel)";
312 print_msg " HEAD send HEAD to server";
313 print_msg " CLEAR clear url list";
314 return;
315 }
316
317 my $action = lc(shift(@args));
318
319 if ($action eq "list") {
320
321 if (@args > 0) {
322 my $i = shift(@args);
323 print_url_list_item($i, $urls[$i]);
324 } else {
325 my $i = 0;
326 foreach my $l (@urls) {
327 print_url_list_item($i, $l);
328 $i++;
329 }
330 }
331
332 } elsif($action eq "open") {
333
334 my $i = $#urls;
335 if (@args > 0) {
336 $i = shift(@args);
337 }
338 open_url($urls[$i]);
339
340 } elsif ($action eq "quote") {
341
342 my $i = $#urls;
343 if (@args > 0) {
344 $i = shift(@args);
345 }
346 Irssi::active_win()->command("SAY URL: " . $urls[$i]);
347
348 } elsif ($action eq "clear") {
349
350 splice @urls;
351
352 } elsif ($action eq "head") {
353
354 my $i = $#urls;
355 if (@args > 0) {
356 $i = shift(@args);
357 }
358 my $url = $urls[$i];
359 $url =~ s/^.*?\s.*?\s//;
360
361 do_head($url);
362
363 } else {
364 print_msg "Unknown action";
365 }
366 }
367
368
369 Irssi::command_bind('head', 'sig_head');
370 Irssi::command_bind('url', 'sig_url');
371 Irssi::signal_add_first('event privmsg', 'sig_msg');
372
373 Irssi::settings_add_bool("url_log", "url_log", 1);
374 Irssi::settings_add_bool("url_log", "url_log_auto_head", 1);
375 Irssi::settings_add_bool("url_log", "url_log_db", 0);
376 Irssi::settings_add_str("url_log", "url_log_db_dsn", 'DBI:mysql:irc_url:localhost');
377 Irssi::settings_add_str("url_log", "url_log_db_user", 'irc_url');
378 Irssi::settings_add_str("url_log", "url_log_db_password", 'nada');
379 Irssi::settings_add_str("url_log", "url_log_file", "~/.irssi/url");
380 Irssi::settings_add_str("url_log", "url_log_timestamp_format", '%c');
381 Irssi::settings_add_str("url_log", "url_log_format", '%s %n %t %u');
382 Irssi::settings_add_str("url_log", "url_log_browser", 'galeon -n -x %f > /dev/null');
383 Irssi::settings_add_int("url_log", "url_log_size", 25);
384 Irssi::settings_add_str("url_log", "url_auto_head_format", '%c %t %l bytes');
385 Irssi::settings_add_str("url_log", "url_head_format", '
386 Content-Type: %t
387 Length: %l bytes
388 Server: %s');
389
390
391 Irssi::theme_register(['url_head', '[%gHTTP Head%n %g$0%n]$1-',
392 'url_auto_head', '[%gHEAD%n] $0-',
393 'url_list', '[$0] $1 %W$2%n $3-']);