#!/usr/bin/env perl

#
# unique-domain-builder.pl
#
#   (C) 2010 José Miguel Parrella Romero
#   This is free software, distributed under the same terms of Perl.
#

use strict;

use Yahoo::Search;
use Domain::PublicSuffix;

my $n;       # Total number of results
my $i = 0;   # Current search result
my $c = 100; # Maximum search results size

my $cctld   = $ARGV[0] || 've';
my $query   = "site:gob.$cctld OR site:gov.$cctld";
my %options = ( AppId => 'UDB 0.1',
                Type => 'any', AllowSimilar => 0, Language => undef );

# Optional list of prefixes for the base domain results
my @prefixes = qw/ipv6 6 www6 w6 www.v6 www.ipv6/;

# Get the search results count
my $search = Yahoo::Search->new();
my $req    = $search->Request( Doc => $query, %options, Start => 0, Count => 1 );
my $res    = $req->Fetch();
   $n      = $res->CountAvail;

# Initialize D::PS
my $d = new Domain::PublicSuffix({'data_file'=>'./suffices'});

my %domains; # List of unique domains

while ( $i < $n ) {
  # Build an iterative query
  my @s = Yahoo::Search->Results(
	Doc   => $query,
	Start => $i,
	Count => $c,
	%options
  );
  exit $@ if $@; # we exit if we have an error due to Yahoo! returning 400 when limit is reached

  my %f; # list of unique domains per-iteration

  foreach (@s) { # a search result
	++$i;
	my $u = $_->Url;
           $u =~ s#^http://##; # stripping stuff...
	   $u =~ s#/.+##;      # then some more...
	   $u =~ s#/##;        # and... done!

	my $b = $d->get_root_domain($u); # D::PS does its work

	unless ($domains{$b}) { # domain isn't registered
		$domains{$b} = $_->Title; # store it in the global list
		$f{$b} = $_->Title;       # store it in the session list
	}
  }

  foreach my $e ( keys %f ) { # session list of unique domains
	print "$e\n";
	foreach my $p ( @prefixes ) { # prefixes
		print "$p.$e\n";
	}
  }
}
