html/gsi.pl
1 #!/usr/bin/perl -w
2 # <<< MISSION STATEMENT >>>
3 #
4 # gsi.pl
5 # Looks up an 8 digit number in the Norwegian yellowpages...
6 # ( http://www.gulesider.no/ )
7 #
8 # Prints information after removing identical entries.
9 # Written by <mistr@sensewave.com> for irssi 0.8.9
10 #
11 # TODO:
12 # - enhance the regexes (less stripping, better matching)
13 # - shrink code (more generalized subs)
14 # - add functionality for name and address lookups
15 #
16 # <<< BEGING CODE >>>
17 use strict;
18 use LWP::UserAgent;
19 use URI::Heuristic;
20 use vars qw($VERSION %IRSSI);
21
22 $VERSION = "220904-04:30:00";
23
24 my %IRSSI = (
25 authors => "mistr",
26 contact => "mistr\@sensewave.com",
27 name => "gsi",
28 modules => 'LWP::UserAgent, URI::Heuristic',
29 description => "/gsi <phone nr> checks number via http://gulesider.no. Norwegian 8-digit numbers only. Nice if you have caller-ID and are as paranoid as me.",
30 license => "Public Domain",
31 url => "http://irssi.org/scripts",
32 changed => "$VERSION"
33 );
34 # No need to change
35 my $owner = "mistr.atat.sensewave.dotdot.com";
36 my $banner = "[http://gulesider.no]";
37 # Don't touch
38 Irssi::settings_add_bool('gsi', 'gsi_debug', 0);
39 Irssi::print("Set gsi_debug ON for debugging output");
40 Irssi::command_bind('gsi', 'cmd_gsi');
41 Irssi::print("Added command /gsi");
42
43 # Subs
44 sub cmd_gsi {
45 my $debug = Irssi::settings_get_bool('gsi_debug');
46 undef $debug unless ( $debug == 1 ) ;
47 my ($lookup,$server,$witem) = @_;
48 $lookup =~ s/\s+//g;
49 if ( $lookup =~ m/^([0-9]{8}?)$/ ) {
50 $lookup = $1;
51 } else {
52 print CLIENTCRAP "%R>>%n Syntax error. Use /gsi <8digitnumber>";
53 return;
54 }
55 print CLIENTCRAP "%R>>%n Looking up $lookup";
56 my $address = "http://www.gulesider.no/gsi/numberSearch.do?tel=";
57 $address .= $lookup;
58 chomp(my $raw_url = $address);
59 my $url = URI::Heuristic::uf_urlstr($raw_url);
60 my $ua = LWP::UserAgent->new();
61 $ua->agent("$owner");
62 my $req = HTTP::Request->new(GET => $url);
63 $req->referer("$owner");
64 my $response = $ua->request($req);
65 if ($response->is_error()) {
66 print CLIENTCRAP "%R>>%n Something went wrong fetching by HTTP";
67 return;
68 } else {
69 my $rawdata = $response->content(); # get the data
70 $_ = $rawdata;
71 if ( m/0 treff\./s ) {
72 print CLIENTCRAP "%R>>%n $banner No hits.";
73 undef $lookup;
74 return;
75 } elsif ( /S\ø\;ket\ ga\ treff\ i(.*)Gule Sider(.*)og(.*)Telefonkatalogen(.*)/ms ) {
76 print CLIENTCRAP "%R>>%n $banner Multiple listings. Manual search needed.";
77 print CLIENTCRAP "%R>>%n \($address\)";
78 undef $lookup;
79 return;
80 }
81 my $result = codezap( $rawdata );
82 ( $debug ) && Irssi::print("debug - $result");
83 $_ = $result;
84 if ( /\([0-9]+ treff\)(.*)function\ submitDrill\(select\)/ ) { # multiple hits
85 my $rest = $1;
86 ( $debug ) && Irssi::print("debug - MULTIPLE HITS");
87 $rest =~ s/[vV]is.treffene.i.kart//g;
88 $rest =~ s/[Tt]reff.i.+\(\d+.treff\)//g;
89 my ($result, %sorted);
90 while ($_ = $rest) {
91 m/^[ ]*(.+?)\ (\d{2,}[\d ]+\d{2,3})[ ]+/;
92 my $info = $1;
93 my $number = $2;
94 $rest = $';
95 ( $debug ) && Irssi::print("debug - $info - $number");
96 $result = $info . " " . $number;
97 $sorted{$result}++;
98 }
99 foreach $result (sort keys %sorted) {
100 print CLIENTCRAP "%R>>%n $banner $result";
101 }
102 undef $lookup;
103 return;
104 } elsif ( m/.*totalt 1 treff\. (.+) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?Send.*/ ) {
105 ( $debug ) && Irssi::print("debug - 1 HIT STANDARD");
106 my $info = $1;
107 my $number = $2;
108 my $other = $3;
109 if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
110 $info =~ s/[Ss]e ogs.+? [A-Z ]+[A-Z]{2,} //;
111 $result = splitwords( $info );
112 $result .= " $number"
113 } elsif ( /.*treffene i kart (.*) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?\'\)\;/) {
114 ( $debug ) && Irssi::print("debug - 1 HIT OTHER");
115 my $info = $1;
116 my $number = $2;
117 my $other = $3;
118 if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
119 $result = splitwords( $info );
120 $result .= " $number"
121 } else {
122 ( $debug ) && Irssi::print("debug - FAILED REGEX");
123 $result = "Unrecognized reply from server";
124 }
125 print CLIENTCRAP "%R>>%n $banner $result";
126 undef $lookup;
127 return;
128 }
129 }
130
131 sub codezap {
132 my $zap = join('', @_);
133 $zap =~ s/\ \;?//g;
134 $zap =~ s/\&\;?/\&/g;
135 $zap =~ s/\<.+?\>/ /msg;
136 $zap =~ s/\s+/ /mg;
137 $zap =~ s/ +/ /mg;
138 $zap =~ s/^ +$//mg;
139 return "$zap";
140 }
141
142 sub splitwords {
143 my $workload = join('', @_);
144 my @result;
145 foreach ( split(' ', $workload) ) {
146 if (m/([A-Z][^A-Z ]+)([A-Z][^A-Z ]+)/) {
147 push(@result, $1 . " " . $2);
148 } else {
149 push(@result, $_);
150 }
151 }
152 return join(' ', @result);
153 }
154