",$tmp);
if ($tmp2 === false) { return false; }
$row = substr($rawHTML,$tmp,$tmp2-$tmp);
$pattern = "/| | | | | ",$tmp);
if ($tmp === false) { return false; }
$tmp++;
$tmp2 = strpos(strtoupper($row),"",$idx)+5;
$rowIdx++;
/* Now parse the rest of the rows. */
$tmp = strpos($upperHTML," | ",$idx);
if ($tmp2 === false) { return false; }
$table = substr($rawHTML,$tmp,$tmp2-$tmp);
while ($tmp = strpos(strtoupper($table),"
",$tmp);
if ($tmp === false) { return false; }
$tmp++;
$tmp2 = strpos(strtoupper($row),"")+5);
$rowIdx++;
}
return $aryData;
}
/*
Static method table_into_xml()
Generic function to return xml dataset from HTML table data
rawHTML: the page source
needle: optional string to start parsing source from
allowedTags: list of tags to NOT strip from data, e.g. ""
*/
function table_into_xml($rawHTML,$needle="",$needle_within=0,$allowedTags="") {
if (!$aryTable = http::table_into_array($rawHTML,$needle,$needle_within,$allowedTags)) { return false; }
$xml = "\n";
$xml .= "\n";
$rowIdx = 0;
foreach ($aryTable as $row) {
$xml .= "\t\n";
$colIdx = 0;
foreach ($row as $col) {
$xml .= "\t\t".trim(utf8_encode(htmlspecialchars($col)))."\n";
$colIdx++;
}
$xml .= "\t
\n";
$rowIdx++;
}
$xml .= "
";
return $xml;
}
}
// end class, start functions
function Site1() {
$h = new http();
$h->dir = "FOLDER HERE"; // enter server path to the cache directory on YOUR server
$c = "URL HERE"; // enter the URL - http:// and all - to home page of the site you're scraping
$make_external = "rel=\"external\" ";
$url = "FULL URL HERE"; // enter the full http paht to the URL fo the page you're scraping
if (!$h->fetch($url, 86400)) {
echo "There is a problem with the http request!
";
echo $h->log;
exit();
}
$matches = http::table_into_array($h->body, "ITEM 1", 1, "ITEM 2"); // "Item 1" and "Item 2" are the beginning and end tags of the content you're scraping
if($matches) {
foreach($matches as $m) {
$correct_url = str_replace('href="', $make_external . 'href="' . $c, $m);
echo $correct_url[0] . "
\n";
}
} else {
echo "Whoops! We had a problem loading this content. Please try refreshing the page.";
}
}
?>