html/gsi.pl


   1 #!/usr/bin/perl -w
   2 # <<< MISSION STATEMENT >>>
   3 #
   4 #  gsi.pl
   5 # Looks up an 8 digit number in the Norwegian yellowpages...
   6 # ( http://www.gulesider.no/ )
   7 #
   8 # Prints information after removing identical entries.
   9 # Written by <mistr@sensewave.com> for irssi 0.8.9
  10 #
  11 # TODO:
  12 #  - enhance the regexes (less stripping, better matching)
  13 #  - shrink code (more generalized subs)
  14 #  - add functionality for name and address lookups
  15 #
  16 # <<< BEGING CODE >>>
  17 use strict;
  18 use LWP::UserAgent;
  19 use URI::Heuristic;
  20 use vars qw($VERSION %IRSSI);
  21 
  22 $VERSION = "220904-04:30:00";
  23 
  24 my %IRSSI = (
  25     authors     => "mistr",
  26     contact     => "mistr\@sensewave.com",
  27     name        => "gsi",
  28     modules     => 'LWP::UserAgent, URI::Heuristic',
  29     description => "/gsi <phone nr> checks number via http://gulesider.no. Norwegian 8-digit numbers only. Nice if you have caller-ID and are as paranoid as me.",
  30     license     => "Public Domain",
  31     url         => "http://irssi.org/scripts",
  32     changed     => "$VERSION"
  33 );
  34 # No need to change
  35 my $owner = "mistr.atat.sensewave.dotdot.com";
  36 my $banner = "[http://gulesider.no]";
  37 # Don't touch
  38 Irssi::settings_add_bool('gsi', 'gsi_debug', 0);
  39 Irssi::print("Set gsi_debug ON for debugging output");
  40 Irssi::command_bind('gsi', 'cmd_gsi');
  41 Irssi::print("Added command /gsi");
  42 
  43 # Subs
  44 sub cmd_gsi {
  45 	my $debug = Irssi::settings_get_bool('gsi_debug');
  46 	undef $debug unless ( $debug == 1 ) ;
  47   	my ($lookup,$server,$witem) = @_;
  48 	$lookup =~ s/\s+//g;
  49         if ( $lookup =~ m/^([0-9]{8}?)$/ ) {
  50 	  	$lookup = $1;
  51 	} else {
  52 		print CLIENTCRAP "%R>>%n Syntax error. Use /gsi <8digitnumber>";
  53 	  	return;
  54 	}
  55 	print CLIENTCRAP "%R>>%n Looking up $lookup";
  56 	my $address = "http://www.gulesider.no/gsi/numberSearch.do?tel=";
  57 	$address .= $lookup;
  58 	chomp(my $raw_url = $address);
  59 	my $url = URI::Heuristic::uf_urlstr($raw_url);
  60 	my $ua = LWP::UserAgent->new();
  61 	$ua->agent("$owner");
  62 	my $req = HTTP::Request->new(GET => $url);
  63 	$req->referer("$owner");
  64 	my $response = $ua->request($req);
  65 	if ($response->is_error()) {
  66 		print CLIENTCRAP "%R>>%n Something went wrong fetching by HTTP";
  67 		return;
  68 	} else {
  69        		my $rawdata = $response->content(); # get the data
  70 		$_ = $rawdata;
  71 		if ( m/0 treff\./s ) {
  72 			print CLIENTCRAP "%R>>%n $banner No hits.";
  73 			undef $lookup;
  74 			return;
  75 		} elsif ( /S\&oslash\;ket\ ga\ treff\ i(.*)Gule Sider(.*)og(.*)Telefonkatalogen(.*)/ms ) {
  76 			print CLIENTCRAP "%R>>%n $banner Multiple listings. Manual search needed.";
  77 			print CLIENTCRAP "%R>>%n \($address\)";
  78 			undef $lookup;
  79 			return;
  80 		}
  81 		my $result = codezap( $rawdata );
  82 		( $debug ) && Irssi::print("debug - $result");
  83 		$_ = $result;
  84 		if ( /\([0-9]+ treff\)(.*)function\ submitDrill\(select\)/ ) { # multiple hits
  85 			my $rest = $1;
  86 			( $debug ) && Irssi::print("debug - MULTIPLE HITS");
  87 			$rest =~ s/[vV]is.treffene.i.kart//g;
  88 			$rest =~ s/[Tt]reff.i.+\(\d+.treff\)//g;
  89 			my ($result, %sorted);
  90 			while ($_ = $rest) {
  91 				m/^[ ]*(.+?)\ (\d{2,}[\d ]+\d{2,3})[ ]+/;
  92 				my $info = $1;
  93 				my $number = $2;
  94 				$rest = $';
  95 				( $debug ) && Irssi::print("debug - $info - $number");
  96 				$result = $info . " " . $number;
  97 				$sorted{$result}++;
  98 			}
  99 			foreach $result (sort keys %sorted) {
 100 				print CLIENTCRAP "%R>>%n $banner $result";
 101 			}
 102 			undef $lookup;
 103 			return;
 104 		} elsif ( m/.*totalt 1 treff\. (.+) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?Send.*/ ) {
 105 			( $debug ) && Irssi::print("debug - 1 HIT STANDARD");
 106 			my $info = $1;
 107 			my $number = $2;
 108 			my $other = $3;
 109 			if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
 110 			$info =~ s/[Ss]e ogs.+? [A-Z ]+[A-Z]{2,} //;
 111 			$result = splitwords( $info ); 
 112 			$result .= " $number"
 113 		} elsif ( /.*treffene i kart (.*) ([\d ]+) (.*[a-z-_.+=]+\@[a-z-_.+=]+\..+? )?\'\)\;/) {
 114 			( $debug ) && Irssi::print("debug - 1 HIT OTHER");
 115 			my $info = $1;
 116 			my $number = $2;
 117 			my $other = $3;
 118 			if ( $other =~ m/\w{3,}/ ) { $number .= " " . $other; }
 119 			$result = splitwords( $info ); 
 120 			$result .= " $number"
 121 		} else {
 122 			( $debug ) && Irssi::print("debug - FAILED REGEX");
 123 			$result = "Unrecognized reply from server";
 124 		}
 125 		print CLIENTCRAP "%R>>%n $banner $result";
 126 		undef $lookup;
 127 		return;
 128 	}
 129 }	
 130 
 131 sub codezap {
 132         my $zap = join('', @_);
 133 	$zap =~ s/\&nbsp\;?//g;
 134 	$zap =~ s/\&amp\;?/\&/g;
 135 	$zap =~ s/\<.+?\>/ /msg;
 136 	$zap =~ s/\s+/ /mg;
 137 	$zap =~ s/ +/ /mg;
 138 	$zap =~ s/^ +$//mg;
 139 	return "$zap";
 140 }
 141 
 142 sub splitwords {
 143         my $workload = join('', @_);
 144 	my @result;
 145 	foreach ( split(' ', $workload) ) {
 146 		if (m/([A-Z][^A-Z ]+)([A-Z][^A-Z ]+)/) {
 147 			push(@result, $1 . " " . $2);
 148 		} else {
 149 			push(@result, $_);
 150 		}
 151 	}
 152 	return join(' ', @result);
 153 }
 154