#!/usr/bin/perl -w # #usage: ./google-query [term] # #does google search for [term], follows all non-google pages returned, follows #through to all of google's returns. note that the script doesn't escape #[term] so you gotta do that part. #example: ./google-query sid+%26+nancy #is a search for 'sid & nancy' # #written by francisco, http://www.blackant.net/ # #From my understaning of google's Terms of Service, i don't think personal use #of this script violates those terms, but im no lawyer so use at your own peril use strict; use HTML::Parser; use LWP::UserAgent; use LWP::Simple; $|=1; #when i wrote this, my website was the first return for this search. my $query = shift || 'porn+junkie'; #increase to be more polite, decrease to be ruder my $sleep = 1; my $base = 'http://www.google.com'; my $subbase = '/search?num=100&q='; my $url = $base.$subbase.$query; my $res = ''; my @pstuff = (); my @followed = (); my $req = new HTTP::Request('GET', $url); my $ua = new LWP::UserAgent; $ua->agent('Mozilla/5.0'); #run through each page of google search returns, hopefully do { my $p = HTML::Parser->new( api_version => 3, start_h => [\@pstuff, 'tagname, attr'], marked_sections => 1, ); $res = $ua->request($req); $p->parse($res->content) if ($res->is_success); push @followed, $url; print "parsing: ", $url, $/; LINKS: foreach my $v (@pstuff) { next LINKS unless $$v[0] =~ /^a/i; #we only deal with tags if (defined (${$$v[1]}{'href'})) { #or more specifically, with tags my $found = ${$$v[1]}{'href'}; #a google page if ($found !~ /^http:\/\//) { if ($found =~ /^\/search/ && !in_array($found, \@followed)) { $url = 'http://www.google.com'.$found; $req = new HTTP::Request('GET', $url); } } #a non google page elsif ($found !~ /\.google\.com/) { print "getting: ", $found, $/; #if you want to do something with the data you #get, do it here - e.g. my $data = get($found) #and then parse $data for whatever. get($found); sleep $sleep; } } } } while (!in_array($url, \@followed)); sub in_array { my $link = shift; my $array = shift; foreach my $val (@$array) { return 1 if ($val eq $link); } return 0; }