#!/usr/local/bin/perl
# suspect.pl
# Feed URLs to a Google SafeSearch. If inurl: returns results, the
# URL probably isn't questionable content. If inurl: returns no
# results, either it points at questionable content or isn't in
# the Google index at all.
# Your Google API developer's key.
my $google_key = 'put your key here';
# Location of the GoogleSearch WSDL file.
my $google_wdsl = "./GoogleSearch.wsdl";
use strict;
use SOAP::Lite;
$|++; # turn off buffering
my $google_search = SOAP::Lite->service("file:$google_wdsl");
# CSV header
print qq{"url","safe/suspect/unindexed","title"\n};
while (my $url = <>) {
chomp $url;
$url =~ s!^\w+?://!!;
$url =~ s!^www\.!!;
# SafeSearch
my $results = $google_search ->
doGoogleSearch(
$google_key, "inurl:$url", 0, 10, "false", "", "true",
"", "latin1", "latin1"
);
print qq{"$url",};
if (grep /$url/, map { $_->{URL} } @{$results->{resultElements}}) {
print qq{"safe"\n};
}
else {
# unSafeSearch
my $results = $google_search ->
doGoogleSearch(
$google_key, "inurl:$url", 0, 10, "false", "", "false",
"", "latin1", "latin1"
);
# Unsafe or Unindexed?
print (
(scalar grep /$url/, map { $_->{URL} } @{$results->{resultElements}})
? qq{"suspect"\n}
: qq{"unindexed"\n}
);
}
}