PHP批量检查网站的sitemap是否存在
文章转载,话不多说,代码其实可以改的更好点。
<?php $webfile = "sitexml.txt"; $opensite = fopen($webfile, 'r'); function curl($url) { /* * 测试用的浏览器信息 * */ $browsers = array ( "user_agent" => "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)", "language" => "en-us,en;q=0.5" ); $ch = curl_init(); // 设置 url curl_setopt($ch, CURLOPT_URL, $url); // 设置浏览器的特定header //CURLOPT_HTTPHEADER: An array of HTTP header fields to set. //curl_setopt($ch, CURLOPT_HTTPHEADER, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 (.NET CLR 3.5.30729)'); curl_setopt($ch, CURLOPT_HTTPHEADER, array ( "User-Agent: {$browsers['user_agent']}", "Accept-Language: {$browsers['language']}" )); // 页面内容我们并不需要 curl_setopt($ch, CURLOPT_NOBODY, 1); // 只需返回HTTP header curl_setopt($ch, CURLOPT_HEADER, 1); // 返回结果,而不是输出它 curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //执行curl操作 //return (curl_exec($ch)!==false) ? true : false; $output = curl_exec($ch); return $output; curl_close($ch); } while (!feof($opensite)) { $onesite = fgets($opensite, 4096); $onesite = str_replace("\n", "", $onesite); $url = $onesite . "/sitemap.xml"; if (!empty ($onesite)) { echo "[URL]: $url<br>"; echo curl($url); if (curl($url) == false) { echo '<FONT color=#ff0000>' . "网站不能打开" . '</font>' . "<br>"; } preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status); //print_r($http_status); if ($http_status[1] == 200) { echo $url . "存在sitemap" . "<br>"; } if ($http_status[1] == 301) { $url = 'www.' . $url; preg_match('/HTTP\/1.1\s*(\d+)[\s\S]+/', curl($url), $http_status); if ($http_status[1] == 200) { echo $url . "存在sitemap" . "<br>"; } else { echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>'; } } if ($http_status[1] == (404|400|403|500|501|502|503|504|505)) { echo '<FONT color=#ff0000>' . $url . "没有sitemap" . '</font>'; } echo "<br><br>"; } } ?>
转载请注明来自WebShell'S Blog,本文地址:https://www.webshell.cc/2443.html