More comprehensive list of rogue bots.
This commit is contained in:
parent
4b04781a9d
commit
978229b0b2
|
@ -149,10 +149,36 @@ nginx_robots_disallow_everything: false
|
||||||
nginx_robots_enable_crawl_delay: false
|
nginx_robots_enable_crawl_delay: false
|
||||||
nginx_robots_crawl_delay: 10
|
nginx_robots_crawl_delay: 10
|
||||||
nginx_robots_disallowed_useragent_list:
|
nginx_robots_disallowed_useragent_list:
|
||||||
- SemrushBot
|
- name: SemrushBot
|
||||||
- SemrushBot-SA
|
uri: /
|
||||||
- Yandex
|
- name: SemrushBot-SA
|
||||||
- YandexBot
|
uri: /
|
||||||
|
- name: Yandex
|
||||||
|
uri: /
|
||||||
|
- name: YandexBot
|
||||||
|
uri: /
|
||||||
|
# - name: ClaudeBot
|
||||||
|
# uri: /
|
||||||
|
# - name: Claude-Web
|
||||||
|
# uri: /
|
||||||
|
# - name: Bytedance
|
||||||
|
# uri: /
|
||||||
|
# - name: Bytespider
|
||||||
|
# uri: /
|
||||||
|
# - name: FacebookBot
|
||||||
|
# uri: /
|
||||||
|
# - name: ChatGPT-User
|
||||||
|
# uri: /
|
||||||
|
# - name: openai
|
||||||
|
# uri: /
|
||||||
|
# - name: openai.com
|
||||||
|
# uri: /
|
||||||
|
# - name: GPTBot
|
||||||
|
# uri: /
|
||||||
|
# - name: DotBot
|
||||||
|
# uri: /
|
||||||
|
# - name: PetalBot
|
||||||
|
# uri: /
|
||||||
|
|
||||||
nginx_robots_disallowed_uris: false
|
nginx_robots_disallowed_uris: false
|
||||||
nginx_robots_disallowed_uris_list: []
|
nginx_robots_disallowed_uris_list: []
|
||||||
|
|
|
@ -101,6 +101,9 @@ server {
|
||||||
access_log off;
|
access_log off;
|
||||||
}
|
}
|
||||||
location = /robots.txt {
|
location = /robots.txt {
|
||||||
|
{% if nginx_install_robots_txt %}
|
||||||
|
root {{ nginx_webroot }};
|
||||||
|
{% endif %}
|
||||||
allow all;
|
allow all;
|
||||||
log_not_found off;
|
log_not_found off;
|
||||||
access_log off;
|
access_log off;
|
||||||
|
@ -248,6 +251,9 @@ server {
|
||||||
access_log off;
|
access_log off;
|
||||||
}
|
}
|
||||||
location = /robots.txt {
|
location = /robots.txt {
|
||||||
|
{% if nginx_install_robots_txt %}
|
||||||
|
root {{ nginx_webroot }};
|
||||||
|
{% endif %}
|
||||||
allow all;
|
allow all;
|
||||||
log_not_found off;
|
log_not_found off;
|
||||||
access_log off;
|
access_log off;
|
||||||
|
|
|
@ -3,10 +3,11 @@ User-Agent: *
|
||||||
Disallow: /
|
Disallow: /
|
||||||
{% else %}
|
{% else %}
|
||||||
{% for ua in nginx_robots_disallowed_useragent_list %}
|
{% for ua in nginx_robots_disallowed_useragent_list %}
|
||||||
User-agent: {{ ua }}
|
User-agent: {{ ua.name }}
|
||||||
Disallow: /
|
Disallow: {{ ua.uri | default('/') }}
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{% if nginx_robots_disallowed_uris %}
|
{% if nginx_robots_disallowed_uris %}
|
||||||
User-agent: *
|
User-agent: *
|
||||||
{% for uri in nginx_robots_disallowed_uris_list %}
|
{% for uri in nginx_robots_disallowed_uris_list %}
|
||||||
|
@ -19,3 +20,4 @@ User-Agent: *
|
||||||
Crawl-Delay: {{ nginx_robots_crawl_delay }}
|
Crawl-Delay: {{ nginx_robots_crawl_delay }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue