More comprehensive list of rogue bots.

This commit is contained in:
Andrea Dell'Amico 2025-01-17 20:05:29 +01:00
parent 4b04781a9d
commit 978229b0b2
Signed by: adellam
GPG Key ID: 147ABE6CEB9E20FF
3 changed files with 41 additions and 7 deletions

View File

@ -149,10 +149,36 @@ nginx_robots_disallow_everything: false
nginx_robots_enable_crawl_delay: false nginx_robots_enable_crawl_delay: false
nginx_robots_crawl_delay: 10 nginx_robots_crawl_delay: 10
nginx_robots_disallowed_useragent_list: nginx_robots_disallowed_useragent_list:
- SemrushBot - name: SemrushBot
- SemrushBot-SA uri: /
- Yandex - name: SemrushBot-SA
- YandexBot uri: /
- name: Yandex
uri: /
- name: YandexBot
uri: /
# - name: ClaudeBot
# uri: /
# - name: Claude-Web
# uri: /
# - name: Bytedance
# uri: /
# - name: Bytespider
# uri: /
# - name: FacebookBot
# uri: /
# - name: ChatGPT-User
# uri: /
# - name: openai
# uri: /
# - name: openai.com
# uri: /
# - name: GPTBot
# uri: /
# - name: DotBot
# uri: /
# - name: PetalBot
# uri: /
nginx_robots_disallowed_uris: false nginx_robots_disallowed_uris: false
nginx_robots_disallowed_uris_list: [] nginx_robots_disallowed_uris_list: []

View File

@ -101,6 +101,9 @@ server {
access_log off; access_log off;
} }
location = /robots.txt { location = /robots.txt {
{% if nginx_install_robots_txt %}
root {{ nginx_webroot }};
{% endif %}
allow all; allow all;
log_not_found off; log_not_found off;
access_log off; access_log off;
@ -248,6 +251,9 @@ server {
access_log off; access_log off;
} }
location = /robots.txt { location = /robots.txt {
{% if nginx_install_robots_txt %}
root {{ nginx_webroot }};
{% endif %}
allow all; allow all;
log_not_found off; log_not_found off;
access_log off; access_log off;

View File

@ -3,10 +3,11 @@ User-Agent: *
Disallow: / Disallow: /
{% else %} {% else %}
{% for ua in nginx_robots_disallowed_useragent_list %} {% for ua in nginx_robots_disallowed_useragent_list %}
User-agent: {{ ua }} User-agent: {{ ua.name }}
Disallow: / Disallow: {{ ua.uri | default('/') }}
{% endfor %} {% endfor %}
{% if nginx_robots_disallowed_uris %} {% if nginx_robots_disallowed_uris %}
User-agent: * User-agent: *
{% for uri in nginx_robots_disallowed_uris_list %} {% for uri in nginx_robots_disallowed_uris_list %}
@ -19,3 +20,4 @@ User-Agent: *
Crawl-Delay: {{ nginx_robots_crawl_delay }} Crawl-Delay: {{ nginx_robots_crawl_delay }}
{% endif %} {% endif %}
{% endif %} {% endif %}