robots: throw an error 403 to bots ignoring robots.txt

This commit is contained in:
Tanguy Pruvot 2017-05-27 05:21:06 +02:00
parent a6658ad221
commit 8d95852252
2 changed files with 13 additions and 4 deletions

View file

@ -11,9 +11,18 @@ if(isset($_SERVER['HTTP_X_FORWARDED_FOR']))
//$_SERVER['PATH_INFO'] = $_SERVER['REQUEST_URI'];
if(0)
{
debuglog("{$_SERVER['REMOTE_ADDR']}, {$_SERVER['REQUEST_URI']}");
// blacklist some search bots which ignore robots.txt (most in fact)
$isbot = false; $agent = arraySafeVal($_SERVER,'HTTP_USER_AGENT','');
if (strpos($agent, 'MJ12bot') || strpos($agent, 'DotBot') || strpos($agent, 'robot'))
$isbot = true;
else if (strpos($agent, 'AhrefsBot') || strpos($agent, 'YandexBot') || strpos($agent, 'Googlebot'))
$isbot = true;
if ($isbot) {
$url = arraySafeVal($_SERVER,'REQUEST_URI');
if (strpos($url, "explorer"))
throw new CHttpException(403,"You are not wanted on this server. see robots.txt");
die();
}
try

View file

@ -1,2 +1,2 @@
User-agent: *
Disallow:
Disallow: /