#!/usr/bin/perl

# find_domains.pl -- find the domain names referenced as source or references within an html file

open(HTML, $ARGV[0]);
while (<HTML>) {
        $line = $_;
        while ($line =~ /(src|href)\s*=\s*[\'\"]([^\'\"]+)[\'\"](.*)$/i) {
		$tmatch = $2;
		$line = $3;
		#print "$tmatch    ";
                #if ( ($tmatch =~ /\./) && ($tmatch =~ /\//) ) {
		#for just 2nd level domain# if ($tmatch =~ /[htp:\/]*[^\/]*\b([^\/]+\.[^\/]+)/) {
		#for full domain name#
		if ($tmatch =~ /[htp:\/]*([^\/]+\.[^\/]+)/) {
			print "$1\n";
		}
        }
}
close(HTML);

