More comprehensive list of rogue bots.

This commit is contained in:
Andrea Dell'Amico 2025-01-17 20:05:29 +01:00
parent 4b04781a9d
commit 978229b0b2
Signed by: adellam
GPG Key ID: 147ABE6CEB9E20FF
3 changed files with 41 additions and 7 deletions

View File

@ -149,10 +149,36 @@ nginx_robots_disallow_everything: false
nginx_robots_enable_crawl_delay: false
nginx_robots_crawl_delay: 10
nginx_robots_disallowed_useragent_list:
- SemrushBot
- SemrushBot-SA
- Yandex
- YandexBot
- name: SemrushBot
uri: /
- name: SemrushBot-SA
uri: /
- name: Yandex
uri: /
- name: YandexBot
uri: /
# - name: ClaudeBot
# uri: /
# - name: Claude-Web
# uri: /
# - name: Bytedance
# uri: /
# - name: Bytespider
# uri: /
# - name: FacebookBot
# uri: /
# - name: ChatGPT-User
# uri: /
# - name: openai
# uri: /
# - name: openai.com
# uri: /
# - name: GPTBot
# uri: /
# - name: DotBot
# uri: /
# - name: PetalBot
# uri: /
nginx_robots_disallowed_uris: false
nginx_robots_disallowed_uris_list: []

View File

@ -101,6 +101,9 @@ server {
access_log off;
}
location = /robots.txt {
{% if nginx_install_robots_txt %}
root {{ nginx_webroot }};
{% endif %}
allow all;
log_not_found off;
access_log off;
@ -248,6 +251,9 @@ server {
access_log off;
}
location = /robots.txt {
{% if nginx_install_robots_txt %}
root {{ nginx_webroot }};
{% endif %}
allow all;
log_not_found off;
access_log off;

View File

@ -3,10 +3,11 @@ User-Agent: *
Disallow: /
{% else %}
{% for ua in nginx_robots_disallowed_useragent_list %}
User-agent: {{ ua }}
Disallow: /
User-agent: {{ ua.name }}
Disallow: {{ ua.uri | default('/') }}
{% endfor %}
{% if nginx_robots_disallowed_uris %}
User-agent: *
{% for uri in nginx_robots_disallowed_uris_list %}
@ -18,4 +19,5 @@ Disallow: {{ uri }}
User-Agent: *
Crawl-Delay: {{ nginx_robots_crawl_delay }}
{% endif %}
{% endif %}
{% endif %}