mirror of
https://github.com/LBRYFoundation/pool.git
synced 2025-08-23 09:27:25 +00:00
robots: throw an error 403 to bots ignoring robots.txt
This commit is contained in:
parent
a6658ad221
commit
8d95852252
2 changed files with 13 additions and 4 deletions
|
@ -11,9 +11,18 @@ if(isset($_SERVER['HTTP_X_FORWARDED_FOR']))
|
|||
|
||||
//$_SERVER['PATH_INFO'] = $_SERVER['REQUEST_URI'];
|
||||
|
||||
if(0)
|
||||
{
|
||||
debuglog("{$_SERVER['REMOTE_ADDR']}, {$_SERVER['REQUEST_URI']}");
|
||||
// blacklist some search bots which ignore robots.txt (most in fact)
|
||||
$isbot = false; $agent = arraySafeVal($_SERVER,'HTTP_USER_AGENT','');
|
||||
if (strpos($agent, 'MJ12bot') || strpos($agent, 'DotBot') || strpos($agent, 'robot'))
|
||||
$isbot = true;
|
||||
else if (strpos($agent, 'AhrefsBot') || strpos($agent, 'YandexBot') || strpos($agent, 'Googlebot'))
|
||||
$isbot = true;
|
||||
|
||||
if ($isbot) {
|
||||
$url = arraySafeVal($_SERVER,'REQUEST_URI');
|
||||
if (strpos($url, "explorer"))
|
||||
throw new CHttpException(403,"You are not wanted on this server. see robots.txt");
|
||||
die();
|
||||
}
|
||||
|
||||
try
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
User-agent: *
|
||||
Disallow:
|
||||
Disallow: /
|
||||
|
|
Loading…
Add table
Reference in a new issue