From 71e3a948c6eb2e91fd4e6206f7691ba8fcbff400 Mon Sep 17 00:00:00 2001 From: Andrea Dell'Amico Date: Wed, 7 Jul 2021 14:03:33 +0200 Subject: [PATCH] Optionally manage a robots.txt file. --- defaults/main.yml | 13 +++++++++ tasks/main.yml | 2 ++ tasks/nginx-virtualhosts.yml | 8 +++--- tasks/robots-txt.yml | 53 ++++++++++++++++++++++++++++++++++++ templates/robots.txt.j2 | 21 ++++++++++++++ 5 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 tasks/robots-txt.yml create mode 100644 templates/robots.txt.j2 diff --git a/defaults/main.yml b/defaults/main.yml index 6fd7821..407b7fc 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -111,6 +111,19 @@ nginx_block_dotfiles: True nginx_logrotate_maxfilesize: "1G" nginx_logrotate_retention: "52" +nginx_install_robots_txt: False +nginx_robots_disallow_everything: False +nginx_robots_enable_crawl_delay: False +nginx_robots_crawl_delay: 10 +nginx_robots_disallowed_useragent_list: + - SemrushBot + - SemrushBot-SA + - Yandex + - YandexBot + +nginx_robots_disallowed_uris: False +nginx_robots_disallowed_uris_list: [] + nginx_use_common_virthost: False # # Virtualhost example diff --git a/tasks/main.yml b/tasks/main.yml index 6f1e42d..9bc55cc 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -6,6 +6,8 @@ - import_tasks: nginx-config.yml - import_tasks: nginx-virtualhosts.yml when: nginx_use_common_virthost | bool +- import_tasks: robots-txt.yml + when: nginx_use_common_virthost | bool - import_tasks: nginx-logrotate.yml - import_tasks: nginx-letsencrypt.yml when: letsencrypt_acme_install is defined and letsencrypt_acme_install diff --git a/tasks/nginx-virtualhosts.yml b/tasks/nginx-virtualhosts.yml index 0380a63..b796d0c 100644 --- a/tasks/nginx-virtualhosts.yml +++ b/tasks/nginx-virtualhosts.yml @@ -10,12 +10,12 @@ block: - name: Install the nginx virtualhost files template: src=nginx-virthost.j2 dest=/etc/nginx/sites-available/{{ item.virthost_name }} owner=root group=root mode=0444 - with_items: '{{ nginx_virthosts | default(omit) }}' + loop: '{{ nginx_virthosts | default(omit) }}' notify: Reload nginx - name: Enable the nginx virtualhosts file: src=/etc/nginx/sites-available/{{ item.virthost_name }} dest=/etc/nginx/sites-enabled/{{ item.virthost_name }} state=link - with_items: '{{ nginx_virthosts | default(omit) }}' + loop: '{{ nginx_virthosts | default(omit) }}' notify: Reload nginx when: ansible_distribution_file_variety == "Debian" @@ -25,7 +25,7 @@ block: - name: Install the nginx virtualhost files template: src=nginx-virthost.j2 dest=/etc/nginx/conf.d/{{ item.virthost_name }}.conf owner=root group=root mode=0444 - with_items: '{{ nginx_virthosts | default(omit) }}' + loop: '{{ nginx_virthosts | default(omit) }}' notify: Reload nginx - name: nginx must be able to network connect when used as a proxy @@ -33,7 +33,7 @@ name: httpd_can_network_connect state: yes persistent: yes - with_items: '{{ nginx_virthosts | default(omit) }}' + loops: '{{ nginx_virthosts | default(omit) }}' when: item.proxy_standard_setup is defined and item.proxy_standard_setup when: ansible_distribution_file_variety == "RedHat" diff --git a/tasks/robots-txt.yml b/tasks/robots-txt.yml new file mode 100644 index 0000000..130c3cf --- /dev/null +++ b/tasks/robots-txt.yml @@ -0,0 +1,53 @@ +--- +- name: Install a global robots.txt + block: + - name: Install a robots.txt into the global webroot + template: + src: robots.txt.j2 + dest: '{{ nginx_web_root }}/robots.txt' + owner: root + group: root + mode: 0444 + + when: nginx_install_robots_txt | bool + tags: [ 'nginx', 'robots_txt' ] + +- name: Install a virtualhost specific robots.txt + block: + - name: Install a robots.txt into the virtualhost webroot + template: + src: robots.txt.j2 + dest: '{{ item.root }}/robots.txt' + owner: root + group: root + mode: 0444 + loop: '{{ nginx_virthosts }}' + when: nginx_webroot != item.root + + when: + - nginx_install_robots_txt + - nginx_use_common_virthost + tags: [ 'nginx', 'robots_txt' ] + +- name: Remove the global robots.txt + block: + - name: Remove the global robots.txt + file: + dest: '{{ nginx_web_root }}/robots.txt' + state: absent + + when: not nginx_install_robots_txt + tags: [ 'nginx', 'robots_txt' ] + +- name: Remove the virtualhost specific robots.txt + block: + - name: Remove the robots.txt into the virtualhost webroot + file: + dest: '{{ item.root }}/robots.txt' + state: absent + loop: '{{ nginx_virthosts }}' + + when: + - not nginx_install_robots_txt + - nginx_use_common_virthost + tags: [ 'nginx', 'robots_txt' ] diff --git a/templates/robots.txt.j2 b/templates/robots.txt.j2 new file mode 100644 index 0000000..409648f --- /dev/null +++ b/templates/robots.txt.j2 @@ -0,0 +1,21 @@ +{% if nginx_robots_disallow_everything %} +User-Agent: * +Disallow: / +{% else %} +{% for ua in nginx_robots_disallowed_useragent_list %} +User-agent: {{ ua }} +Disallow: / + +{% endfor %} +{% if nginx_robots_disallowed_uris %} +User-agent: * +{% for uri in nginx_robots_disallowed_uris_list %} +Disallow: {{ uri }} +{% endfor %} +{% endif %} + +{% if nginx_robots_enable_crawl_delay %} +User-Agent: * +Crawl-Delay: {{ nginx_robots_crawl_delay }} +{% endif %} +{% endif %} \ No newline at end of file