Wednesday, October 8, 2008

Parsing HTML to find Links

$url = "http://in.php.net/manual/en/function.stream-context-create.php";
$input = @file_get_contents($url) or die('Could not access file: $url');
$regexp = "]*href=(\"??)([^\" >]*?)\\1[^>]*>(.*)<\/a>";
$regexp = "]*src=(\"??)([^\" >]*?)\\1[^>]*>(.*)>";
if(preg_match_all("/$regexp/siU", $input, $matches))
{
//# $matches[2] = array of link addresses # $matches[3] = array of link text - including HTML code
print_r($matches[2]);
}

No comments: