summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkwadronaut <kwadronaut@autistici.org>2024-02-05 12:52:32 +0000
committerkwadronaut <kwadronaut@autistici.org>2024-02-05 12:52:32 +0000
commit354794a833750a4ac2e90e20fa141e23c3cce20c (patch)
tree67c3cd1100f99d4291c9f2d58ccbb4f8b9c07888
parentc6d787af527667d24631d61f7b3050d25cce8139 (diff)
parentc36242b3fea95e7e4883157c25d31bb333edae0d (diff)
Merge branch 'feat/upgrade-float' into 'main'
#70 git subrepo pull --force float See merge request leap/container-platform/lilypad!74
-rw-r--r--float/.gitlab-ci.yml28
-rw-r--r--float/.gitrepo6
-rw-r--r--float/ci/deploy.yml2
-rw-r--r--float/docs/quickstart.it.md4
-rw-r--r--float/docs/quickstart.md6
-rw-r--r--float/docs/reference.md55
-rw-r--r--float/docs/reference.pdfbin499864 -> 513127 bytes
-rwxr-xr-xfloat/float10
-rw-r--r--float/playbooks/apt-upgrade.yml1
-rw-r--r--float/playbooks/frontend.yml4
-rw-r--r--float/playbooks/init-credentials.yml16
-rw-r--r--float/plugins/action/float_authorized_keys.py27
-rw-r--r--float/plugins/action/float_container_options.py108
-rw-r--r--float/plugins/action/tinc_host_conf.py73
-rw-r--r--float/plugins/inventory/float.py25
-rw-r--r--float/roles/float-base-backup-metadata/handlers/main.yml3
-rw-r--r--float/roles/float-base-datasets/tasks/dataset_litestream.yml5
-rw-r--r--float/roles/float-base-datasets/templates/litestream-restore-script.j25
-rw-r--r--float/roles/float-base-datasets/templates/restore-service.j23
-rw-r--r--float/roles/float-base-docker/defaults/main.yml7
-rw-r--r--float/roles/float-base-docker/files/in-container.sh13
-rw-r--r--float/roles/float-base-docker/tasks/main.yml11
-rw-r--r--float/roles/float-base-docker/tasks/podman.yml23
-rw-r--r--float/roles/float-base-docker/tasks/start.yml30
-rw-r--r--float/roles/float-base-docker/templates/cleanup.sh.j22
-rwxr-xr-xfloat/roles/float-base-docker/templates/float-pull-image.j211
-rw-r--r--float/roles/float-base-docker/templates/run.sh.j2127
-rw-r--r--float/roles/float-base-docker/templates/systemd.j216
-rw-r--r--float/roles/float-base-docker/vars/podman.yml5
-rw-r--r--float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml27
-rw-r--r--float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j24
-rw-r--r--float/roles/float-base-net-overlay/templates/tinc/tinc-up.j22
-rw-r--r--float/roles/float-base-service-credentials/meta/main.yml2
-rw-r--r--float/roles/float-base/files/modprobe-hardening.conf33
-rw-r--r--float/roles/float-base/files/node-exporter-freeipmi.awk87
-rw-r--r--float/roles/float-base/files/node-exporter-scripts/freeipmi.sh3
-rw-r--r--float/roles/float-base/files/node-exporter-scripts/smartmon.py17
-rw-r--r--float/roles/float-base/tasks/apt.yml92
-rw-r--r--float/roles/float-base/tasks/harden.yml5
-rw-r--r--float/roles/float-base/tasks/ipmi.yml22
-rw-r--r--float/roles/float-base/tasks/main.yml10
-rw-r--r--float/roles/float-base/tasks/rollback_protection.yml3
-rw-r--r--float/roles/float-base/tasks/ssh.yml4
-rw-r--r--float/roles/float-base/tasks/systemd.yml13
-rw-r--r--float/roles/float-base/templates/firewall/10float.j24
-rw-r--r--float/roles/float-base/templates/rsyslog.conf.j26
-rw-r--r--float/roles/float-base/templates/sources.list.j26
-rw-r--r--float/roles/float-base/templates/ssh/sshd_config.j229
-rw-r--r--float/roles/float-base/templates/sysctl.conf.j28
-rw-r--r--float/roles/float-base/templates/vhostmap.prom.j22
-rw-r--r--float/roles/float-base/vars/main.yml2
-rw-r--r--float/roles/float-infra-acme/tasks/main.yml11
-rw-r--r--float/roles/float-infra-admin-dashboard/handlers/main.yml6
-rw-r--r--float/roles/float-infra-dns/defaults/main.yml4
-rw-r--r--float/roles/float-infra-dns/templates/bind/named.conf.options10
-rw-r--r--float/roles/float-infra-dns/templates/dns/infra.yml15
-rw-r--r--float/roles/float-infra-dns/templates/zonetool.yml4
-rw-r--r--float/roles/float-infra-haproxy/templates/haproxy.cfg.j22
-rw-r--r--float/roles/float-infra-log-collector/defaults/main.yml2
-rw-r--r--float/roles/float-infra-log-collector/tasks/main.yml9
-rw-r--r--float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j234
-rw-r--r--float/roles/float-infra-nginx/handlers/main.yml6
-rw-r--r--float/roles/float-infra-nginx/meta/main.yml2
-rw-r--r--float/roles/float-infra-nginx/tasks/nginx.yml8
-rw-r--r--float/roles/float-infra-nginx/templates/config/accept.map4
-rw-r--r--float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf1
-rw-r--r--float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf3
-rw-r--r--float/roles/float-infra-nginx/templates/config/nginx.conf1
-rw-r--r--float/roles/float-infra-nginx/templates/nginx.mtail.j217
-rw-r--r--float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json1997
-rw-r--r--float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json750
-rw-r--r--float/roles/float-infra-prometheus/templates/prometheus.yml.j221
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml8
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml2
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml28
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml4
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml4
-rw-r--r--float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml4
-rw-r--r--float/roles/float-infra-service-dashboard/handlers/main.yml6
-rw-r--r--float/roles/float-infra-service-dashboard/tasks/main.yml (renamed from float/roles/float-infra-admin-dashboard/tasks/main.yml)5
-rw-r--r--float/roles/float-infra-sso-server/defaults/main.yml4
-rw-r--r--float/roles/float-infra-sso-server/handlers/main.yml2
-rw-r--r--float/roles/float-infra-sso-server/meta/main.yml2
-rw-r--r--float/roles/float-infra-sso-server/templates/server.yml.j214
-rw-r--r--float/roles/float-util-credentials/README.md24
-rw-r--r--float/roles/float-util-credentials/tasks/main.yml33
-rw-r--r--float/roles/float-util-tor-exits-dataset/README.md2
-rw-r--r--float/roles/float-util-tor-exits-dataset/files/is-tor-exit6
-rw-r--r--float/roles/float-util-tor-exits-dataset/files/update-tor-exits15
-rw-r--r--float/roles/float-util-tor-exits-dataset/tasks/main.yml31
-rwxr-xr-xfloat/scripts/floatup.py43
-rw-r--r--float/services.core.yml325
-rw-r--r--float/services.default.yml105
l---------[-rw-r--r--]float/services.yml.default106
l---------[-rw-r--r--]float/services.yml.no-elasticsearch319
-rwxr-xr-xfloat/test-driver45
-rw-r--r--float/test/README.md4
-rw-r--r--float/test/backup.ref/services.yml4
-rw-r--r--float/test/backup.ref/site.yml7
-rw-r--r--float/test/base.ref/services.yml2
-rw-r--r--float/test/float_integration_test/http.py10
-rw-r--r--float/test/float_integration_test/test_system.py6
-rw-r--r--float/test/full.ref/services.yml6
103 files changed, 2295 insertions, 2795 deletions
diff --git a/float/.gitlab-ci.yml b/float/.gitlab-ci.yml
index 7e93b27..3453101 100644
--- a/float/.gitlab-ci.yml
+++ b/float/.gitlab-ci.yml
@@ -96,23 +96,29 @@ stop_full_test_review:
- if: $CI_MERGE_REQUEST_ID
when: manual
-backup_test:
+#backup_test:
+# <<: *base_test
+# variables:
+# VM_IMAGE: "bullseye"
+# CREATE_ENV_VARS: "--additional-config test/backup.ref/config-backup.yml --playbook test/backup.ref/site.yml"
+# TEST_DIR: "test/backup.ref"
+
+bookworm_test:
<<: *base_test
variables:
- VM_IMAGE: "bullseye"
- CREATE_ENV_VARS: "--additional-config test/backup.ref/config-backup.yml --playbook test/backup.ref/site.yml"
- TEST_DIR: "test/backup.ref"
+ VM_IMAGE: "bookworm"
+ CREATE_ENV_VARS: "-e config.float_debian_dist=bookworm"
+ TEST_DIR: "test/full.ref"
docker_build_and_release_tests:
stage: docker_build
- image: docker:latest
- services:
- - docker:dind
+ image: quay.io/podman/stable
+ tags: [podman]
script:
- - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
- - cd test && docker build --build-arg ci_token=$CI_JOB_TOKEN --pull -t $IMAGE_TAG .
- - docker tag $IMAGE_TAG $CI_REGISTRY_IMAGE:integration-test
- - docker push $CI_REGISTRY_IMAGE:integration-test
+ - echo -n "$CI_JOB_TOKEN" | podman login -u gitlab-ci-token --password-stdin $CI_REGISTRY
+ - cd test && podman build --build-arg ci_token=$CI_JOB_TOKEN --pull -t $IMAGE_TAG .
+ - podman tag $IMAGE_TAG $CI_REGISTRY_IMAGE:integration-test
+ - podman push $CI_REGISTRY_IMAGE:integration-test
only:
changes:
- test/float_integration_test/**
diff --git a/float/.gitrepo b/float/.gitrepo
index a9d1600..88bd8cc 100644
--- a/float/.gitrepo
+++ b/float/.gitrepo
@@ -6,7 +6,7 @@
[subrepo]
remote = https://git.autistici.org/ai3/float.git
branch = master
- commit = c2c4ad89ae7ad3f1f722bf4528e1057c377e2886
- parent = d9b2f97887292b92babad1990bd760c23e952416
- cmdver = 0.4.3
+ commit = b02a3496b03886b59f9b1fd6c197d06c4a9ce66d
+ parent = c6d787af527667d24631d61f7b3050d25cce8139
+ cmdver = 0.4.6
method = merge
diff --git a/float/ci/deploy.yml b/float/ci/deploy.yml
index 04354f1..b0d3d1a 100644
--- a/float/ci/deploy.yml
+++ b/float/ci/deploy.yml
@@ -54,7 +54,7 @@ variables:
- >
$BUILD_DIR/float/float create-env
--domain=${DOMAIN}
- --services=${BUILD_DIR}/float/services.yml.no-elasticsearch
+ --services=${BUILD_DIR}/float/services.core.yml
--services=${SERVICES_FILE}
--passwords=${BUILD_DIR}/float/passwords.yml.default
--passwords=${PASSWORDS_FILE}
diff --git a/float/docs/quickstart.it.md b/float/docs/quickstart.it.md
index 72b17ac..59316ee 100644
--- a/float/docs/quickstart.it.md
+++ b/float/docs/quickstart.it.md
@@ -31,8 +31,8 @@ dipendenze possono essere installate con questo comando:
```shell
sudo apt install golang ansible vagrant
-go get -u git.autistici.org/ale/x509ca
-go get -u git.autistici.org/ale/ed25519gen
+go install git.autistici.org/ale/x509ca@latest
+go install git.autistici.org/ale/ed25519gen@latest
export PATH=$PATH:$HOME/go/bin
```
diff --git a/float/docs/quickstart.md b/float/docs/quickstart.md
index 58b0b6a..f09a644 100644
--- a/float/docs/quickstart.md
+++ b/float/docs/quickstart.md
@@ -35,8 +35,8 @@ other dependencies can be installed with the following commands:
```shell
sudo apt install golang ansible vagrant
-go get -u git.autistici.org/ale/x509ca
-go get -u git.autistici.org/ale/ed25519gen
+go install git.autistici.org/ale/x509ca@latest
+go install git.autistici.org/ale/ed25519gen@latest
export PATH=$PATH:$HOME/go/bin
```
@@ -106,7 +106,7 @@ files for Ansible and Vagrant, with default values filled in by
*create-env* automatically generates a default *admin* user, with
password *password*.
-You can read the [configuration reference](configuration.md) for
+You can read the [configuration reference](reference.md) for
details on the configuration file syntax and what the various options
mean.
diff --git a/float/docs/reference.md b/float/docs/reference.md
index d588cf7..b088a68 100644
--- a/float/docs/reference.md
+++ b/float/docs/reference.md
@@ -164,7 +164,7 @@ proxy layer in the picture, the conceptual flow is quite simple:
```
reverse proxy
|
- V
+ V
apache
|
V
@@ -331,7 +331,7 @@ Float offers two backup mechanisms for datasets:
favor correctness over availability, is also in the works. This
backup mechanism is *extensible* to understand the structure and
metadata of specific services' entities and accounts, if necessary.
-
+
* There are a number of instances, in float, of a specific category of
service, single-hosted small API services that run off a simple
SQLite database, some of which are critical to float's operation
@@ -568,7 +568,7 @@ Ansible roles to configure them.
Note that, in its default setup, float will naturally assume a
two-tier service topology, with "frontend" hosts handling traffic
routing in a stateless fashion, and "backend" hosts running the actual
-services. The default *services.yml.default* service description file
+services. The default *services.default.yml* service description file
literally expects the *frontend* and *backend* Ansible groups to be
defined in your inventory. However, these are just roles, and there is
nothing inherent in float that limits you to this kind of topology.
@@ -1575,6 +1575,8 @@ provided:
specify a regex (with a capture group) to extract back the host
name from the target; the default regex will extract the short
host name from URLs and host:port targets.
+* (optionally) a *scrape_interval* if for some reason it should be
+ different than the default *prometheus_probe_scrape_interval*.
So, in the context of the previous example, if we wanted to probe
another float service called *myservice*, which hypothetically serves
@@ -1829,7 +1831,7 @@ pairs that define group variables.
### Groups
While you can define any host groups you want, the default service
-configuration in float (*services.yml.default*) expects you to define
+configuration in float (*services.default.yml*) expects you to define
at least two:
* *frontend*, for the public-facing reverse proxy hosts
@@ -1922,12 +1924,12 @@ Service metadata is encoded as a dictionary of *service name*:
Metadata for services that are part of the core infrastructure ships
embedded with this repository, so when writing your own `services.yml`
file, you only need to add your services to it. You should include the
-*services.yml.default* file shipped with the float source, which
+*services.default.yml* file shipped with the float source, which
defines all the built-in services:
```yaml
include:
- - "/path/to/float/services.yml.default"
+ - "/path/to/float/services.default.yml"
```
The `include` directive is special: it does not define a service, but
@@ -2032,6 +2034,9 @@ attributes:
`labels`: An optional dictionary of key/value labels to set for this
target (they will be added to all metrics scraped from it).
+`scrape_interval`: Optionally override the scrape interval for this
+target.
+
The Prometheus *job* labels for service targets will be automatically
generated by *float* to include the service name and the endpoint
port.
@@ -2506,8 +2511,8 @@ tuples used for redirecting top-level domains to specific destinations
service which is normally part of the log-collector infrastructure. As
this is a large Java daemon with significant memory requirements, it
is often useful to disable it for testing environments. Note that in
-this case one should also import *services.yml.no-elasticsearch*
-instead of the default *services.yml.default*.
+this case one should import *services.core.yml*
+instead of the default *services.default.yml*.
`es_log_keep_days` is a dictionary that specifies the retention time
for the various log types, in days. The default is `{ audit: 60,
@@ -2533,6 +2538,10 @@ instances should scrape their targets (default 10s).
`prometheus_lts_scrape_interval` sets how often the long-term
Prometheus instances should scrape the primary ones (default 1m).
+`prometheus_probe_scrape_interval` controls the default
+scrape_interval setting for all blackbox probes, and it just defaults
+to the value of prometheus_scrape_interval if unset.
+
`prometheus_external_targets` allows adding additional targets to Prometheus
beyond those that are described by the service metadata. It is a list of entries
with *name*, *targets* attributes. Optionally, you may specify a *scheme*
@@ -2572,6 +2581,15 @@ to be notified about resolved alerts (default False).
### Third-party services
+#### ACME
+
+Float's ACME certificate generation service does not require any
+configuration, as it will automatically generate a Letsencrypt
+account. It is possible, however, to tell it to use a specific account
+by providing it with a private key:
+
+`acme_private_key` - ACME private key, in PEM format
+
#### Private Docker registry
You can have float use a private Docker registry by providing it with
@@ -2622,7 +2640,7 @@ server requires TLS
`alertmanager_smtp_auth_username` and
`alertmanager_smtp_auth_password` - credentials for
authentication
-
+
`alertmanager_smtp_hello` - hostname to use in the HELO SMTP
header sent to the server (default *localhost*)
@@ -2694,8 +2712,8 @@ unsupported.
```shell
sudo apt install golang ansible
-go get git.autistici.org/ale/x509ca
-go get git.autistici.org/ale/ed25519gen
+go install git.autistici.org/ale/x509ca@latest
+go install git.autistici.org/ale/ed25519gen@latest
export PATH=$PATH:$HOME/go/bin
```
@@ -3293,13 +3311,12 @@ process:
* Set *float_debian_dist* to the new codename (e.g. "buster") in your
group_vars/all configuration.
* Run *float*, which will install the correct APT sources for the new
- release.
-* Run *apt dist-upgrade* manually or via Ansible. This part is not
- automated yet due to the large variety in possible scenarios.
-* Run *float* again: it will now detect that the distribution has
- changed and reconfigure packages as needed.
-
+ release and upgrade the servers.
+* Reboot the servers into the new kernels.
+If you want more control over this process (Debian upgrades have been
+event-less for a while now, but it's not always been the case) you
+can of course run the upgrade manually.
# Example scenarios
@@ -3326,7 +3343,7 @@ available) for the service.
```yaml
include:
- - "/path/to/float/services.yml.default"
+ - "/path/to/float/services.default.yml"
ok:
scheduling_group: backend
num_instances: 1
@@ -3401,7 +3418,7 @@ The services.yml file:
```yaml
include:
- - "/path/to/float/services.yml.default"
+ - "/path/to/float/services.default.yml"
videoconf:
scheduling_group: videoconf
num_instances: all
diff --git a/float/docs/reference.pdf b/float/docs/reference.pdf
index d6dcce0..c55f860 100644
--- a/float/docs/reference.pdf
+++ b/float/docs/reference.pdf
Binary files differ
diff --git a/float/float b/float/float
index f4a6fb2..d2727f3 100755
--- a/float/float
+++ b/float/float
@@ -51,7 +51,7 @@ Vagrant.configure(2) do |config|
libvirt.memory = {{ ram }}
libvirt.random_hostname = true
libvirt.cpu_mode = 'host-passthrough'
- libvirt.volume_cache = 'unsafe'
+ libvirt.disk_driver :cache => 'unsafe'
{% if libvirt.remote_host %}
libvirt.host = "{{ libvirt.remote_host }}"
libvirt.username = "{{ libvirt.remote_user }}"
@@ -120,7 +120,7 @@ include:
- "{{ p | relpath(targetdir) }}"
{% endfor %}
{% else %}
- - "{{ srcdir | relpath(targetdir) }}/services.yml.no-elasticsearch"
+ - "{{ srcdir | relpath(targetdir) }}/services.core.yml"
{% endif %}
''',
'passwords.yml': '''---
@@ -185,7 +185,7 @@ DEFAULT_VARS = {
'nocows': 1,
'display_skipped_hosts': False,
- 'callback_whitelist': 'float_ci',
+ 'callbacks_enabled': 'float_ci',
'stdout_callback': 'float_ci',
'host_key_checking': False,
'forks': 50,
@@ -208,7 +208,7 @@ DEFAULT_VARS = {
'domain_public': [],
'testing': True,
- 'float_debian_dist': 'bullseye',
+ 'float_debian_dist': 'bookworm',
'net_overlays': [{
'name': 'vpn0',
'network': '192.168.13.0/24',
@@ -441,7 +441,7 @@ def command_run(config, playbooks,
print('Running playbook %s...' % (arg,))
- os.environ['LC_ALL'] = 'C'
+ os.environ['LC_ALL'] = 'C.UTF-8'
_fix_ansible_vault_password_file()
cmd = [os.getenv('ANSIBLE_PLAYBOOK', 'ansible-playbook'),
'-i', config]
diff --git a/float/playbooks/apt-upgrade.yml b/float/playbooks/apt-upgrade.yml
index f2285d3..dc2a13c 100644
--- a/float/playbooks/apt-upgrade.yml
+++ b/float/playbooks/apt-upgrade.yml
@@ -1,4 +1,5 @@
---
+
- hosts: all
tasks:
- copy:
diff --git a/float/playbooks/frontend.yml b/float/playbooks/frontend.yml
index 27fe8e8..85715f2 100644
--- a/float/playbooks/frontend.yml
+++ b/float/playbooks/frontend.yml
@@ -13,10 +13,10 @@
roles:
- float-infra-dns
-- hosts: admin_dashboard
+- hosts: service_dashboard
gather_facts: no
roles:
- - float-infra-admin-dashboard
+ - float-infra-service-dashboard
- hosts: acme
gather_facts: no
diff --git a/float/playbooks/init-credentials.yml b/float/playbooks/init-credentials.yml
index 555b0ba..6c02e0e 100644
--- a/float/playbooks/init-credentials.yml
+++ b/float/playbooks/init-credentials.yml
@@ -28,7 +28,6 @@
- dnssec
- ssh
- sso
- - x509
# First of all, generate secrets from the passwords.yml file.
- name: Initialize secrets
@@ -50,8 +49,17 @@
- name: Generate SSO credentials
local_action: ed25519 privkey="{{ credentials_dir }}/sso/secret.key" pubkey="{{ credentials_dir }}/sso/public.key"
- - name: Generate global DH params
- local_action: command openssl dhparam -out "{{ credentials_dir }}/x509/dhparam" "{{ dhparam_bits | default('2048') }}" creates="{{ credentials_dir }}/x509/dhparam"
+ - set_fact:
+ default_x509_ca_list:
+ - {tag: x509}
+
+ - name: Create X509 CA directory
+ local_action: file path="{{ credentials_dir }}/{{ item.tag }}" state=directory
+ loop: "{{ x509_ca_list | default(default_x509_ca_list) }}"
- name: Generate the X509 CA certificate
- local_action: x509_ca ca_subject="{{ x509_ca_subject | default('CN=Service CA') }}" ca_cert_path="{{ credentials_dir }}/x509/ca.pem" ca_key_path="{{ credentials_dir }}/x509/ca_private_key.pem"
+ local_action: x509_ca ca_subject="{{ item.subject | default('CN=Service CA') }}" ca_cert_path="{{ credentials_dir }}/{{ item.tag }}/ca.pem" ca_key_path="{{ credentials_dir }}/{{ item.tag }}/ca_private_key.pem"
+ loop: "{{ x509_ca_list | default(default_x509_ca_list) }}"
+
+ - name: Generate global DH params
+ local_action: command openssl dhparam -out "{{ credentials_dir }}/x509/dhparam" "{{ dhparam_bits | default('2048') }}" creates="{{ credentials_dir }}/x509/dhparam"
diff --git a/float/plugins/action/float_authorized_keys.py b/float/plugins/action/float_authorized_keys.py
new file mode 100644
index 0000000..f891d9c
--- /dev/null
+++ b/float/plugins/action/float_authorized_keys.py
@@ -0,0 +1,27 @@
+# Prepare a SSH authorized_keys file content using float 'admins'.
+
+from ansible.plugins.action import ActionBase
+
+
+class ActionModule(ActionBase):
+
+ TRANSFERS_FILES = False
+
+ def run(self, tmp=None, task_vars=None):
+ admins = self._templar.template('{{ admins }}')
+ authorized_keys = []
+
+ # For each SSH key, add a comment with the owner's username.
+ for entry in admins:
+ username = entry['name']
+ if 'ssh_keys' not in entry:
+ continue
+ for key in entry['ssh_keys']:
+ key_without_comment = ' '.join(key.split()[:2])
+ key_with_comment = f'{key_without_comment} {username}\n'
+ authorized_keys.append(key_with_comment)
+
+ result = super(ActionModule, self).run(tmp, task_vars)
+ result['ansible_facts'] = {'float_authorized_keys': ''.join(authorized_keys)}
+ result['changed'] = False
+ return result
diff --git a/float/plugins/action/float_container_options.py b/float/plugins/action/float_container_options.py
new file mode 100644
index 0000000..664af57
--- /dev/null
+++ b/float/plugins/action/float_container_options.py
@@ -0,0 +1,108 @@
+from ansible.plugins.action import ActionBase
+
+
+TMPFS_FLAGS = 'tmpfs-mode=01777'
+DEFAULT_TMPFS_SIZE = '64M'
+
+
+class ActionModule(ActionBase):
+
+ TRANSFERS_FILES = False
+
+ # Options to set the container environment.
+ def _environment_options(self, service, container):
+ service_name = service['name']
+ hostname = self._templar.template('{{ inventory_hostname }}')
+ domain = self._templar.template('{{ domain }}')
+
+ env = {
+ 'FLOAT_SERVICE': f'{service_name}.{domain}',
+ 'FLOAT_INSTANCE_NAME': f'{hostname}.{service_name}.{domain}',
+ 'FLOAT_CONTAINER_IMAGE': container['image'],
+ 'FLOAT_CONTAINER_NAME': f'{service_name}-{container["name"]}',
+ }
+ if 'env' in container:
+ env.update(container['env'])
+
+ options = []
+ for key, value in sorted(env.items()):
+ options.append(f'--env={key}={value}')
+ return options
+
+ # Options for volumes (tmpfs, bind mounts).
+ def _mount_options(self, service, container):
+ options = []
+ add_tmpfs = True
+
+ def _bind(src, dst):
+ options.append(f'--mount=type=bind,source={src},destination={dst}')
+
+ def _tmpfs(dst, flags=None):
+ opt = f'--mount=type=tmpfs,destination={dst},{TMPFS_FLAGS}'
+ if flags:
+ opt += f',{flags}'
+ options.append(opt)
+
+ if container.get('readonly', True):
+ options.append('--read-only')
+ add_tmpfs = False
+
+ for vol in container.get('volumes', []):
+ for src, dst in sorted(vol.items()):
+ if dst == '/tmp':
+ add_tmpfs = False
+ if src == 'tmpfs':
+ _tmpfs(dst, f'tmpfs-size={DEFAULT_TMPFS_SIZE}')
+ elif src.startswith('tmpfs/'):
+ sz = src[6:]
+ _tmpfs(dst, f'tmpfs-size={sz}')
+ else:
+ _bind(src, dst)
+ _tmpfs('/run', 'tmpfs-size=16M,exec=true,notmpcopyup')
+ _bind('/dev/log', '/dev/log')
+ _bind('/etc/credentials/system', '/etc/ssl/certs')
+ if add_tmpfs:
+ _tmpfs('/tmp', f'tmpfs-size={DEFAULT_TMPFS_SIZE},notmpcopyup')
+
+ for creds in service.get('service_credentials', []):
+ creds_name = creds['name']
+ ca_tag = creds.get('ca_tag', 'x509')
+ creds_path = f'/etc/credentials/{ca_tag}/{creds_name}'
+ _bind(creds_path, creds_path)
+
+ return options
+
+ # Network options (ports).
+ def _network_options(self, container):
+ options = ['--network=host']
+ ports = []
+ if 'ports' in container:
+ ports = container['ports']
+ elif 'port' in container:
+ ports = [container['port']]
+ for port in sorted(ports):
+ options.append(f'--expose={port}')
+ return options
+
+ def run(self, tmp=None, task_vars=None):
+ service = self._task.args['service']
+ container = self._task.args['container']
+
+ options = []
+
+ options.extend(self._environment_options(service, container))
+ options.extend(self._mount_options(service, container))
+ options.extend(self._network_options(container))
+
+ is_root = container.get('root')
+ if container.get('drop_capabilities', not is_root):
+ options.append('--security-opt=no-new-privileges')
+ options.append('--cap-drop=all')
+
+ if 'docker_options' in container:
+ options.extend(container['docker_options'].split())
+
+ result = super().run(tmp, task_vars)
+ result['options'] = options
+ result['changed'] = False
+ return result
diff --git a/float/plugins/action/tinc_host_conf.py b/float/plugins/action/tinc_host_conf.py
deleted file mode 100644
index 5b83883..0000000
--- a/float/plugins/action/tinc_host_conf.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Generate a host configuration file for tinc (fetching the public key
-# from the remote host), and store the result in an Ansible fact.
-
-from ansible.plugins.action import ActionBase
-from ansible.errors import AnsibleFileNotFound
-from ansible.module_utils._text import to_text
-
-
-HOST_TEMPLATE = '''
-{% for ip in ips %}
-Address = {{ ip }}
-{% endfor %}
-Port = {{ tinc_config.port | default('655') }}
-Cipher = {{ tinc_config.cipher | default('aes-128-cbc') }}
-Digest = {{ tinc_config.digest | default('sha256') }}
-Compression = {{ tinc_config.compression | default('0') }}
-PMTU = {{ tinc_config.pmtu | default('1460') }}
-Subnet = {{ tinc_host_subnet }}
-
-{{ tinc_host_public_key }}
-'''
-
-
-class ActionModule(ActionBase):
-
- TRANSFERS_FILES = False
-
- def _cmd(self, task_vars, args, creates=None):
- args = {
- '_raw_params': ' '.join(args),
- 'creates': creates,
- }
- return self._execute_module(
- module_name='command',
- module_args=args,
- task_vars=task_vars,
- wrap_async=False)
-
- def run(self, tmp=None, task_vars=None):
- overlay = self._task.args['overlay']
- subnet = self._templar.template('{{ ip_%s }}/32' % overlay)
-
- # Find the overlay configuration by scanning the 'net_overlays'
- # configuration variable, which is a list - it would be simpler with
- # a dictionary.
- net_overlays = self._templar.template('{{ net_overlays|default([]) }}')
- overlay_config = {'name': overlay}
- for n in net_overlays:
- if n['name'] == overlay:
- overlay_config = n
- break
-
- result = super(ActionModule, self).run(tmp, task_vars)
-
- # Fetch the host public key.
- pubkey = self._cmd(task_vars, [
- '/bin/cat', '/etc/tinc/%s/rsa_key.pub' % overlay])['stdout']
- if not pubkey:
- result['failed'] = True
- result['msg'] = "could not fetch host public key"
- return result
-
- # Generate the template, adding some custom variables of our own.
- self._templar._available_variables['tinc_host_subnet'] = subnet
- self._templar._available_variables['tinc_host_public_key'] = pubkey
- self._templar._available_variables['tinc_config'] = overlay_config
- data = self._templar.do_template(HOST_TEMPLATE,
- preserve_trailing_newlines=True,
- escape_backslashes=False)
-
- result['ansible_facts'] = {'tinc_host_config': data}
- result['changed'] = False
- return result
diff --git a/float/plugins/inventory/float.py b/float/plugins/inventory/float.py
index 808b52b..46c2b25 100644
--- a/float/plugins/inventory/float.py
+++ b/float/plugins/inventory/float.py
@@ -352,6 +352,14 @@ def _build_public_endpoints_map(services):
return upstreams, endpoints
+def _build_public_endpoint_port_map(services):
+ endpoints_by_port = {}
+ for svc in services.values():
+ for pe in svc.get('public_endpoints', []):
+ endpoints_by_port[pe['port']] = pe['name']
+ return endpoints_by_port
+
+
# Build the map of upstreams for 'horizontal' (well-known etc) HTTP
# public endpoints.
#
@@ -491,7 +499,9 @@ class Assignments(object):
return str(self._fwd)
@classmethod
- def _available_hosts(cls, service, group_map):
+ def _available_hosts(cls, service, group_map, service_hosts_map):
+ if 'schedule_with' in service:
+ return service_hosts_map[service['schedule_with']]
scheduling_groups = ['all']
if 'scheduling_group' in service:
scheduling_groups = [service['scheduling_group']]
@@ -499,6 +509,8 @@ class Assignments(object):
scheduling_groups = service['scheduling_groups']
available_hosts = set()
for g in scheduling_groups:
+ if g not in group_map:
+ raise Exception(f'The scheduling_group "{g}" is not defined in inventoy')
available_hosts.update(group_map[g])
return list(available_hosts)
@@ -518,13 +530,17 @@ class Assignments(object):
# Iterations should happen over sorted items for reproducible
# results. The sort function combines the 'scheduling_order'
- # attribute (default -1) and the service name.
+ # attribute (default -1), the presence of the 'schedule_with'
+ # attribute, and the service name.
def _sort_key(service_name):
- return (services[service_name].get('scheduling_order', -1), service_name)
+ return (services[service_name].get('scheduling_order', -1),
+ 1 if 'schedule_with' in services[service_name] else 0,
+ service_name)
for service_name in sorted(services.keys(), key=_sort_key):
service = services[service_name]
- available_hosts = cls._available_hosts(service, group_map)
+ available_hosts = cls._available_hosts(service, group_map,
+ service_hosts_map)
num_instances = service.get('num_instances', 'all')
if num_instances == 'all':
service_hosts = sorted(available_hosts)
@@ -611,6 +627,7 @@ def run_scheduler(config):
# The following variables are just used for debugging purposes (dashboards).
'float_service_assignments': assignments._fwd,
'float_service_masters': assignments._masters,
+ 'float_http_endpoints_by_port': _build_public_endpoint_port_map(services),
})
# Set the HTTP frontend configuration on the 'frontend' group.
diff --git a/float/roles/float-base-backup-metadata/handlers/main.yml b/float/roles/float-base-backup-metadata/handlers/main.yml
index 34aab05..68eff7a 100644
--- a/float/roles/float-base-backup-metadata/handlers/main.yml
+++ b/float/roles/float-base-backup-metadata/handlers/main.yml
@@ -4,3 +4,6 @@
systemd:
name: tabacco-metadb.service
state: restarted
+ # Allow failure when testing backups, the unit can't start until later.
+ ignore_errors: "{{ testing | default(True) }}"
+
diff --git a/float/roles/float-base-datasets/tasks/dataset_litestream.yml b/float/roles/float-base-datasets/tasks/dataset_litestream.yml
index 7e31240..1e4a672 100644
--- a/float/roles/float-base-datasets/tasks/dataset_litestream.yml
+++ b/float/roles/float-base-datasets/tasks/dataset_litestream.yml
@@ -2,7 +2,6 @@
- set_fact:
dataset_filename: "{{ dataset.filename }}"
- dataset_replica_url: "{{ backup_litestream_url | default('') }}/{{ dataset_tag }}"
dataset_replication_unit: "replicate-{{ dataset_tag }}.service"
# Just don't backup at all if litestream is not configured.
dataset_should_backup: "{{ dataset_should_backup and (backup_litestream_config is defined) }}"
@@ -18,6 +17,10 @@
when: "dataset_should_backup and backup_litestream_config.get('type', 's3') == 's3'"
- set_fact:
+ dataset_litestream_config: "{{ dataset_litestream_config | combine(dataset.litestream_params) }}"
+ when: "dataset_should_backup and (dataset.litestream_params is defined)"
+
+- set_fact:
litestream_config:
dbs:
- path: "{{ dataset_path }}/{{ dataset_filename }}"
diff --git a/float/roles/float-base-datasets/templates/litestream-restore-script.j2 b/float/roles/float-base-datasets/templates/litestream-restore-script.j2
index 4d0d28a..f37e36d 100644
--- a/float/roles/float-base-datasets/templates/litestream-restore-script.j2
+++ b/float/roles/float-base-datasets/templates/litestream-restore-script.j2
@@ -1,8 +1,11 @@
#!/bin/sh
-{% if backup_litestream_url is defined %}
+{% if backup_litestream_config is defined %}
# Restore the dataset {{ dataset_name }} using litestream.
+set -a
+. /etc/litestream/{{ dataset_tag }}.env
+
/usr/bin/litestream restore --config=/etc/litestream/{{ dataset_tag }}.yml --if-replica-exists -v "{{ dataset_path }}/{{ dataset_filename }}"
if [ $? -gt 0 ]; then
diff --git a/float/roles/float-base-datasets/templates/restore-service.j2 b/float/roles/float-base-datasets/templates/restore-service.j2
index 07801ed..620f140 100644
--- a/float/roles/float-base-datasets/templates/restore-service.j2
+++ b/float/roles/float-base-datasets/templates/restore-service.j2
@@ -9,5 +9,8 @@ Type=oneshot
RemainAfterExit=true
ExecStart=/usr/lib/float/float-dataset-restore {{ dataset_tag }}
+Restart=on-failure
+RestartSec=10s
+
[Install]
RequiredBy={{ required_by | join(' ') }}
diff --git a/float/roles/float-base-docker/defaults/main.yml b/float/roles/float-base-docker/defaults/main.yml
index 8b9cd78..641abd4 100644
--- a/float/roles/float-base-docker/defaults/main.yml
+++ b/float/roles/float-base-docker/defaults/main.yml
@@ -19,12 +19,5 @@ docker_daemon_config:
docker_daemon_config_testing:
insecure-registries: ["192.168.10.1:5000"]
-# Where should we get the Podman packages from? Possible choices are
-# 'ai' (a version pinned in our repository, tested working on Debian
-# buster), 'debian' (use standard Debian packages, only available from
-# bullseye) or 'kubic' (use the upstream Kubic repositories, with the
-# latest release). The default is distribution-dependent.
-podman_package_source: "{{ 'debian' if float_debian_dist == 'bullseye' else 'ai' }}"
-
# Provide a CA for Podman (to inject a caching registry proxy, for example).
#podman_additional_ssl_ca: "{{ lookup('file', credentials_dir + '/podman_ca.pem') }}"
diff --git a/float/roles/float-base-docker/files/in-container.sh b/float/roles/float-base-docker/files/in-container.sh
new file mode 100644
index 0000000..998717b
--- /dev/null
+++ b/float/roles/float-base-docker/files/in-container.sh
@@ -0,0 +1,13 @@
+if [ -z "${PS1-}" ]; then
+ return
+fi
+
+_in_container_completion() {
+ local cur_word args
+ cur_word="${COMP_WORDS[COMP_CWORD]}"
+ args=$(podman ps --format '{{ .Names }}')
+
+ COMPREPLY=($(compgen -W "$args" -- "$cur_word"))
+}
+
+complete -F _in_container_completion in-container
diff --git a/float/roles/float-base-docker/tasks/main.yml b/float/roles/float-base-docker/tasks/main.yml
index 5fc73a5..998e612 100644
--- a/float/roles/float-base-docker/tasks/main.yml
+++ b/float/roles/float-base-docker/tasks/main.yml
@@ -30,10 +30,19 @@
- src: "in-container.j2"
dst: "/usr/local/bin/in-container"
+- name: Install docker-related files
+ copy:
+ src: "{{ item.src }}"
+ dest: "{{ item.dst }}"
+ mode: 0644
+ loop:
+ - src: "in-container.sh"
+ dst: "/etc/profile.d/in-container.sh"
+
- name: Install docker cleanup cron job
copy:
dest: /etc/cron.d/docker-cleanup
- content: "33 */3 * * * root runcron --quiet /usr/local/bin/docker-cleanup\n"
+ content: "33 3 * * * root runcron --quiet /usr/local/bin/docker-cleanup\n"
mode: 0644
- import_tasks: start.yml
diff --git a/float/roles/float-base-docker/tasks/podman.yml b/float/roles/float-base-docker/tasks/podman.yml
index d925888..3168e9f 100644
--- a/float/roles/float-base-docker/tasks/podman.yml
+++ b/float/roles/float-base-docker/tasks/podman.yml
@@ -1,13 +1,34 @@
---
+# Pick a package source for Podman - defaults to using the stock
+# Debian package since bullseye. Possible choices are 'ai' (a version
+# pinned in our repository, tested working on Debian buster), 'debian'
+# (use standard Debian packages, only available from bullseye) or
+# 'kubic' (use the upstream Kubic repositories, with the latest
+# release).
+- set_fact:
+ podman_default_package_source: "{{ podman_default_package_source_by_distro[float_debian_dist] | default('debian') }}"
+- set_fact:
+ podman_package_source: "{{ podman_default_package_source }}"
+ when: "podman_package_source is not defined"
+
- include_tasks: "podman_{{ podman_package_source }}.yml"
+# Try to detect which podman version we're on based on source and
+# distribution.
+- set_fact:
+ podman_version: "{{ 3 if float_debian_dist in ('buster', 'bullseye') else 4 }}"
+ when: "podman_package_source == 'debian'"
+- set_fact:
+ podman_version: 4
+ when: "podman_package_source != 'debian'"
+
# If we've changed sources.list for podman, it is important
# that we run apt upgrade now.
- name: Apt upgrade
apt:
upgrade: 'yes'
- when: "podman_sources_list.changed"
+ when: "podman_sources_list is defined and podman_sources_list.changed"
- name: Symlink podman to docker
file:
diff --git a/float/roles/float-base-docker/tasks/start.yml b/float/roles/float-base-docker/tasks/start.yml
index d788e19..cc8532c 100644
--- a/float/roles/float-base-docker/tasks/start.yml
+++ b/float/roles/float-base-docker/tasks/start.yml
@@ -4,21 +4,31 @@
file:
path: /usr/lib/float/docker
state: directory
+ owner: root
+ group: root
+ mode: "0750"
+
+- name: Configure container runner
+ float_container_options:
+ service: "{{ services[item.service] }}"
+ container: "{{ item.container }}"
+ loop: "{{ float_enabled_containers }}"
+ register: float_container_options
- name: Create run scripts
template:
src: run.sh.j2
- dest: "/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh"
- mode: 0755
+ dest: "/usr/lib/float/docker/run-{{ item.0.tag }}.sh"
+ mode: "0750"
+ owner: root
+ group: root
+ vars:
+ service_user: "{{ services[item.0.service].user }}"
+ container: "{{ item.0.container }}"
+ container_options: "{{ item.1.options }}"
+ tag: "{{ item.0.tag }}"
register: docker_systemd_run_script
- loop: "{{ float_enabled_containers }}"
-
-# TODO temporary only, to be removed once all symlinks are gone
-- name: Cleanup Alias symlink
- file:
- dest: "/etc/systemd/system/{{ item.tag }}"
- state: absent
- loop: "{{ float_enabled_containers }}"
+ loop: "{{ float_enabled_containers | zip(float_container_options.results) }}"
- name: Set up the systemd units
template:
diff --git a/float/roles/float-base-docker/templates/cleanup.sh.j2 b/float/roles/float-base-docker/templates/cleanup.sh.j2
index f60504c..81e4e9a 100644
--- a/float/roles/float-base-docker/templates/cleanup.sh.j2
+++ b/float/roles/float-base-docker/templates/cleanup.sh.j2
@@ -3,6 +3,6 @@
# Cleanup unused and unreferenced Docker images and containers.
#
-{{ container_runtime }} system prune --all --volumes --force
+{{ container_runtime }} system prune --all --volumes --force --filter until=24h
exit 0
diff --git a/float/roles/float-base-docker/templates/float-pull-image.j2 b/float/roles/float-base-docker/templates/float-pull-image.j2
index ebe86dd..606adc4 100755
--- a/float/roles/float-base-docker/templates/float-pull-image.j2
+++ b/float/roles/float-base-docker/templates/float-pull-image.j2
@@ -10,6 +10,8 @@ binary={{ container_runtime }}
auth_file={{ docker_auth_file }}
+manifest_mime_types="application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json"
+
# Automatically pick up proxy settings if present.
[ -e /etc/containers/proxy-settings.sh ] && . /etc/containers/proxy-settings.sh
@@ -19,11 +21,10 @@ get_main_auth_token() {
get_auth_token() {
local url="$1"
- local auth_hdr="$(curl -s -I -H "Accept: application/vnd.docker.distribution.manifest.v2+json" "$url" \
+ local auth_hdr="$(curl -s -I -H "Accept: ${manifest_mime_types}" "$url" \
| awk 'BEGIN{IGNORECASE=1} /^www-authenticate:/ {print $3}')"
if [ -z "$auth_hdr" ]; then
- echo "Could not obtain authentication token from $url" >&2
- exit 1
+ return
fi
local scope=$(printf "%s" "${auth_hdr}" | sed -e 's/^.*scope="\([^"]*\)".*$/\1/')
local service=$(printf "%s" "${auth_hdr}" | sed -e 's/^.*service="\([^"]*\)".*$/\1/')
@@ -36,8 +37,8 @@ get_auth_token() {
get_remote_image_version() {
local url="https://${registry_hostname}/v2/${image_path}/manifests/${image_tag}"
local token="$(get_auth_token "$url")"
- curl -sf -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \
- -H "Authorization: Bearer ${token}" \
+ curl -sf -H "Accept: ${manifest_mime_types}" \
+ ${token:+-H "Authorization: Bearer ${token}"} \
"$url" \
| jq -r .config.digest
}
diff --git a/float/roles/float-base-docker/templates/run.sh.j2 b/float/roles/float-base-docker/templates/run.sh.j2
index 4e08780..9b264d5 100644
--- a/float/roles/float-base-docker/templates/run.sh.j2
+++ b/float/roles/float-base-docker/templates/run.sh.j2
@@ -1,132 +1,43 @@
-#!/bin/sh
-# Start the {{ item.service }}-{{ item.container.name }} container.
+#!/bin/bash
+# Start the {{ tag }} container.
-{# The purpose of this file is to generate a nice, readable shell script, that
- can be inspected on the resulting system. Due to the issues with whitespace
- handling in Jinja, we programatically build an options list, and dump it
- at once at the end - the resulting template looks a lot like code.
-#}
+opts=( {{ container_options | map('quote') | join(' ') }} )
-{# Define a global namespace, and a macro to add options to it #}
-{%- set g = namespace(
- options=[],
- has_custom_tmp_volume=False)
--%}
-{%- macro opt(name, value=None) -%}
-{% if value is not none -%}
- {{ g.options.append('--%s=%s' % (name, value)) }}
-{% else -%}
- {{ g.options.append('--%s' % name) }}
-{% endif -%}
-{%- endmacro -%}
-{%- if item.container.docker_options is defined -%}
- {{ g.options.append(item.container.docker_options) }}
-{%- endif -%}
-
-{{ opt('network', 'host') }}
-
-{# Environment variables #}
-{% for k, v in item.container.get('env', {}) | dictsort %}
- {{ opt('env', '%s=%s' % (k, v | quote)) }}
-{% endfor %}
-
-{# Port configuration #}
-{% if item.container.get('ports', []) %}
- {% for port in item.container.get('ports',[]) %}
- {{ opt('expose', port) }}
- {% endfor %}
-{% elif item.container.get('port') %}
- {{ opt('expose', item.container.port) }}
-{% endif %}
-
-{# Root read-only? #}
-{% if item.container.get('readonly', True) %}
- {{ opt('read-only') }}
- {# podman will mount /run /tmp and /var/tmp with writable tmpfs when
- the --read-only option is specified (see --read-only-tmpfs), so
- do not add our own /tmp mount #}
- {% set g.has_custom_tmp_volume = True %}
-{% endif %}
-
-{# Define mountpoints / volumes. We treat /tmp specially as the container
- configuration might override it. To detect if one of the mounts is a
- /tmp mount, and propagate the result outside of the loop, we have to
- use a special "jinja namespace" variable - see the discussion on
- assignment scope at https://jinja.palletsprojects.com/en/2.11.x/templates/#assignments
-#}
-{% for mount in item.container.get('volumes', []) %}
- {% for k, v in mount.items() %}
- {% if v == '/tmp' %}
- {% set g.has_custom_tmp_volume = True %}
- {% endif %}
- {% if k == 'tmpfs' -%}
- {{ opt('mount', 'type=tmpfs,destination=%s,tmpfs-mode=01777,tmpfs-size=64M' % v) }}
- {% else -%}
- {{ opt('mount', 'type=bind,source=%s,destination=%s' % (k, v)) }}
- {% endif %}
- {% endfor %}
-{% endfor %}
-
-{# System-level volumes that all containers have.
- Include by default tmpfs mounts for standard Debian locations (/tmp,
- /run), and a bind mount for the syslog socket in /dev/log. #}
-{{ opt('mount', 'type=tmpfs,destination=/run,tmpfs-mode=01777,tmpfs-size=16M,noexec=false,notmpcopyup') }}
-{% if not g.has_custom_tmp_volume %}
- {{ opt('mount', 'type=tmpfs,destination=/tmp,tmpfs-mode=01777,tmpfs-size=64M,notmpcopyup') }}
-{% endif %}
-{{ opt('mount', 'type=bind,source=/dev/log,destination=/dev/log') }}
-{{ opt('mount', 'type=bind,source=/etc/credentials/system,destination=/etc/ssl/certs') }}
-
-{# Mount the service credentials inside the container. #}
-{% for creds in services[item.service].get('service_credentials', []) %}
- {{ opt('mount', 'type=bind,source=/etc/credentials/x509/' + creds.name + ',destination=/etc/credentials/x509/' + creds.name) }}
-{% endfor %}
-
-{# Security options (unless root=True) #}
-{% if item.container.get('drop_capabilities', not item.container.get('root')) %}
- {{ opt('security-opt', 'no-new-privileges') }}
- {{ opt('cap-drop', 'all') }}
-{% endif %}
-
-{# The following are options that are set at runtime, we can't use template machinery #}
-opts=
# Optionally mount the OpenCensus tracing config in the container.
if [ -d /etc/tracing ]; then
- opts="$opts --mount=type=bind,source=/etc/tracing,destination=/etc/tracing"
+ opts+=("--mount=type=bind,source=/etc/tracing,destination=/etc/tracing")
fi
-{% if not item.container.get('root') %}
+{% if not container.get('root') %}
# Run as unprivileged user.
-container_uid=$(id -u {{ services[item.service].user }})
-container_gid=$(id -g {{ services[item.service].user }})
-opts="$opts --user=$container_uid:$container_gid"
+container_uid=$(id -u {{ service_user }})
+container_gid=$(id -g {{ service_user }})
+opts+=("--user=$container_uid:$container_gid")
# Add additional groups that the user is a member of.
-for gid in $(id -G {{ services[item.service].user }}); do
+for gid in $(id -G {{ service_user }}); do
if [ $gid -ne $container_gid ]; then
- opts="$opts --group-add=$gid"
+ opts+=("--group-add=$gid")
fi
done
{% endif %}
-
-# TODO: move to --log-driver=passthrough once it is supported
-# by the Podman version in Debian stable, and then add the -d
-# option to get rid of the useless 'podman' process.
{% if container_runtime == 'podman' %}
exec /usr/bin/podman run \
+{% if float_debian_dist in ('buster', 'bullseye') %}
+ --log-driver=none \
+{% else %}
+ -d --log-driver=journald \
+{% endif %}
--cgroups=disabled \
--replace \
--sdnotify=conmon \
{% elif container_runtime == 'docker' %}
exec /usr/bin/systemd-docker --env run \
+ --log-driver=none \
{% endif %}
- --rm --name {{ item.service }}-{{ item.container.name }} \
+ --rm --name {{ tag }} \
--pull=never \
- --log-driver=none \
--no-healthcheck \
- $opts \
-{% for opt in g.options %}
- {{ opt }} \
-{% endfor %}
+ "${opts[@]}" \
"$@" \
- {{ item.container.image }} {{ item.container.get('args', '') }}
+ {{ container.image }} {{ container.get('args', '') }}
diff --git a/float/roles/float-base-docker/templates/systemd.j2 b/float/roles/float-base-docker/templates/systemd.j2
index 1d45fc1..53d9aec 100644
--- a/float/roles/float-base-docker/templates/systemd.j2
+++ b/float/roles/float-base-docker/templates/systemd.j2
@@ -7,15 +7,25 @@ Requires=docker.service
[Service]
ExecStartPre=-rm -f %t/%N.cid
+{# Since Podman 4, we can switch to a 'forking' model without the extra podman process
+ due to the support for --log-driver=passthrough. #}
+{% if container_runtime == 'podman' and podman_version != '3' %}
+Type=forking
+ExecStart=/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh --cidfile=%t/%N.cid --conmon-pidfile=%t/%N.pid
+ExecStopPost=/usr/bin/{{ container_runtime }} rm -f -i --cidfile=%t/%N.cid
+PIDFile=%t/%N.pid
+{% else %}
+Type=notify
+NotifyAccess=all
ExecStart=/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh --cidfile=%t/%N.cid
ExecStopPost=-/usr/bin/{{ container_runtime }} rm -f -i --cidfile=%t/%N.cid
+{% endif %}
ExecStopPost=-rm -f %t/%N.cid
TimeoutStopSec=60
+TimeoutStartSec=240
KillMode=mixed
Restart=always
RestartSec=3s
-Type=notify
-NotifyAccess=all
SyslogIdentifier={{ item.service }}-{{ item.container.name }}
{% if item.container.resources is defined %}
@@ -34,7 +44,7 @@ LimitNOFILE=65535
IPAddressDeny=any
IPAddressAllow=localhost
{# This is a terrible way to determine which private networks the host is on.
- It would be a good candidate for pre-processing in the float plugin. #}
+ It would be a good candidate for pre-processing in the float plugin. #}
{% for net_overlay in net_overlays | sort if ('ip_' + net_overlay.name) in hostvars[inventory_hostname] %}
IPAddressAllow={{ net_overlay.network }}
{% endfor %}
diff --git a/float/roles/float-base-docker/vars/podman.yml b/float/roles/float-base-docker/vars/podman.yml
new file mode 100644
index 0000000..4416871
--- /dev/null
+++ b/float/roles/float-base-docker/vars/podman.yml
@@ -0,0 +1,5 @@
+---
+
+podman_default_package_source_by_distro:
+ stretch: ai
+ buster: ai
diff --git a/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml b/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml
index df0e0fc..c3b7746 100644
--- a/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml
+++ b/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml
@@ -3,7 +3,11 @@
- set_fact:
tinc_net: "{{ item }}"
tinc_dir: "/etc/tinc/{{ item }}"
- tinc_netmask_cidr: 24
+ tinc_host_ip: "{{ hostvars[inventory_hostname]['ip_' + item] }}"
+ net_overlay_config: "{{ net_overlays | selectattr('name', 'eq', item) | first }}"
+
+- set_fact:
+ tinc_host_ip_cidr: "{{ tinc_host_ip }}/{{ net_overlay_config.network | regex_replace('^.*/', '') }}"
- name: Install the tinc package
apt:
@@ -18,11 +22,26 @@
args:
creates: "{{ tinc_dir }}/rsa_key.pub"
-- name: Generate tinc host configuration
- tinc_host_conf:
- overlay: "{{ tinc_net }}"
+- name: Fetch tinc host public key
+ slurp:
+ src: "{{ tinc_dir }}/rsa_key.pub"
+ register: tinc_host_public_key
check_mode: no
+- name: Generate tinc host configuration
+ set_fact:
+ tinc_host_config: |
+ {% for ip in ips %}
+ Address = {{ ip }}
+ {% endfor %}
+ Port = {{ net_overlay_config.port | default('655') }}
+ Cipher = {{ net_overlay_config.cipher | default('aes-128-cbc') }}
+ Digest = {{ net_overlay_config.digest | default('sha256') }}
+ Compression = {{ net_overlay_config.compression | default('0') }}
+ PMTU = {{ net_overlay_config.pmtu | default('1460') }}
+ Subnet = {{ tinc_host_ip }}/32
+ {{ tinc_host_public_key['content'] | b64decode }}
+
# Generate tinc host entries for all *other* hosts. Skip if for
# some reason (failures) we weren't able to fetch it.
- name: Install tinc host configuration
diff --git a/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2 b/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2
index 4526343..b8fb718 100644
--- a/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2
+++ b/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2
@@ -1,8 +1,8 @@
{% macro allow_host_ips(h, chain) %}
-{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv4 | sort %}
+{% for ip in hostvars[h]['ips'] | ansible.utils.ipv4 | sort %}
add_rule4 -A {{ chain }} -s {{ ip }} -j CT --notrack
{% endfor %}
-{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv6 | sort %}
+{% for ip in hostvars[h]['ips'] | ansible.utils.ipv6 | sort %}
add_rule6 -A {{ chain }} -s {{ ip }} -j CT --notrack
{% endfor %}
{% endmacro %}
diff --git a/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2 b/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2
index 538bcc7..d973ec6 100644
--- a/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2
+++ b/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2
@@ -1,4 +1,4 @@
#!/bin/sh
ip link set $INTERFACE up
-ip addr add {{ hostvars[inventory_hostname]['ip_' + tinc_net] }}/{{ tinc_netmask_cidr }} dev $INTERFACE
+ip addr add {{ tinc_host_ip_cidr }} dev $INTERFACE
exit 0
diff --git a/float/roles/float-base-service-credentials/meta/main.yml b/float/roles/float-base-service-credentials/meta/main.yml
index a60e6df..8da7fb7 100644
--- a/float/roles/float-base-service-credentials/meta/main.yml
+++ b/float/roles/float-base-service-credentials/meta/main.yml
@@ -4,4 +4,6 @@ dependencies:
- role: float-util-credentials
vars:
credentials: "{{ float_host_service_credentials }}"
+ ca_tag: "x509"
+
diff --git a/float/roles/float-base/files/modprobe-hardening.conf b/float/roles/float-base/files/modprobe-hardening.conf
new file mode 100644
index 0000000..c0cd23f
--- /dev/null
+++ b/float/roles/float-base/files/modprobe-hardening.conf
@@ -0,0 +1,33 @@
+# Disable automatic conntrack helper assignment.
+options nf_conntrack nf_conntrack_helper=0
+
+# Obscure network protocols with a bad security track record.
+install dccp /bin/false
+install sctp /bin/false
+install rds /bin/false
+install tipc /bin/false
+install n-hdlc /bin/false
+install ax25 /bin/false
+install netrom /bin/false
+install x25 /bin/false
+install rose /bin/false
+install decnet /bin/false
+install econet /bin/false
+install af_802154 /bin/false
+install ipx /bin/false
+install appletalk /bin/false
+install psnap /bin/false
+install p8023 /bin/false
+install p8022 /bin/false
+install can /bin/false
+install atm /bin/false
+
+# Obscure filesystems.
+install cramfs /bin/false
+install freevxfs /bin/false
+install jffs2 /bin/false
+install hfs /bin/false
+install hfsplus /bin/false
+install squashfs /bin/false
+install udf /bin/false
+
diff --git a/float/roles/float-base/files/node-exporter-freeipmi.awk b/float/roles/float-base/files/node-exporter-freeipmi.awk
new file mode 100644
index 0000000..a35e396
--- /dev/null
+++ b/float/roles/float-base/files/node-exporter-freeipmi.awk
@@ -0,0 +1,87 @@
+#!/bin/awk -f
+
+function export(values, name) {
+ if (values["metric_count"] < 1) {
+ return
+ }
+ delete values["metric_count"]
+
+ printf("# HELP %s%s %s sensor reading from freeipmi\n", namespace, name, help[name]);
+ printf("# TYPE %s%s gauge\n", namespace, name);
+ for (sensor in values) {
+ printf("%s%s{sensor=\"%s\"} %f\n", namespace, name, sensor, values[sensor]);
+ }
+}
+
+# Fields are Bar separated, with space padding.
+BEGIN {
+ FS = "[ ]*[|][ ]*";
+ namespace = "node_ipmi_";
+
+ # Friendly description of the type of sensor for HELP.
+ help["temperature_celsius"] = "Temperature";
+ help["volts"] = "Voltage";
+ help["amperes"] = "Current";
+ help["power_watts"] = "Power";
+ help["speed_rpm"] = "Fan";
+ help["status"] = "Chassis status";
+
+ temperature_celsius["metric_count"] = 0;
+ volts["metric_count"] = 0;
+ amperes["metric_count"] = 0;
+ power_watts["metric_count"] = 0;
+ speed_rpm["metric_count"] = 0;
+ status["metric_count"] = 0;
+}
+
+# Not a valid line.
+{
+ if (NF < 3) {
+ next
+ }
+}
+
+# $4 is value field.
+$4 ~ /N\/A/ {
+ next
+}
+
+# $5 is units field.
+$5 ~ /C/ {
+ temperature_celsius[$2] = $4;
+ temperature_celsius["metric_count"]++;
+}
+
+$5 ~ /V/ {
+ volts[$2] = $4;
+ volts["metric_count"]++;
+}
+
+$5 ~ /A/ {
+ amperes[$2] = $4;
+ amperes["metric_count"]++;
+}
+
+$5 ~ /W/ {
+ power_watts[$2] = $4;
+ power_watts["metric_count"]++;
+}
+
+$5 ~ /RPM/ {
+ speed_rpm[$2] = $4;
+ speed_rpm["metric_count"]++;
+}
+
+$2 ~ /Chassis/ {
+ status[$2] = sprintf("%d", substr($4,3,2));
+ status["metric_count"]++;
+}
+
+END {
+ export(temperature_celsius, "temperature_celsius");
+ export(volts, "volts");
+ export(amperes, "amperes");
+ export(power_watts, "power_watts");
+ export(speed_rpm, "speed_rpm");
+ export(status, "status");
+}
diff --git a/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh b/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh
new file mode 100644
index 0000000..2ad1f81
--- /dev/null
+++ b/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+ipmi-sensors | awk -f /usr/lib/float/node-exporter-freeipmi.awk
diff --git a/float/roles/float-base/files/node-exporter-scripts/smartmon.py b/float/roles/float-base/files/node-exporter-scripts/smartmon.py
index 3dd0c8f..8980e20 100644
--- a/float/roles/float-base/files/node-exporter-scripts/smartmon.py
+++ b/float/roles/float-base/files/node-exporter-scripts/smartmon.py
@@ -232,16 +232,13 @@ def device_smart_capabilities(device):
(bool): True whenever SMART is available, False otherwise.
(bool): True whenever SMART is enabled, False otherwise.
"""
- groups = device_info(device)
-
- state = {
- g[1].split(' ', 1)[0]
- for g in groups if g[0] == 'SMART support'}
-
- smart_available = 'Available' in state
- smart_enabled = 'Enabled' in state
-
- return smart_available, smart_enabled
+ try:
+ subprocess.check_call(
+ ['/usr/sbin/smartctl', '--info'] + device.smartctl_select(),
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ return True, True
+ except subprocess.CalledProcessError:
+ return False, False
def collect_device_info(device):
diff --git a/float/roles/float-base/tasks/apt.yml b/float/roles/float-base/tasks/apt.yml
index c592867..7923add 100644
--- a/float/roles/float-base/tasks/apt.yml
+++ b/float/roles/float-base/tasks/apt.yml
@@ -45,6 +45,15 @@
- "deb http://deb.autistici.org/urepo ai3/"
- "deb http://deb.autistici.org/urepo buster-podman/"
+- set_fact:
+ apt_debian_components:
+ - main
+ - contrib
+ - non-free
+- set_fact:
+ apt_debian_components: "{{ apt_debian_components + ['non-free-firmware'] }}"
+ when: "float_debian_dist not in ('buster', 'bullseye')"
+
- name: Install our standard sources.list
template:
src: "sources.list.j2"
@@ -59,6 +68,7 @@
- stretch
- buster
- bullseye
+ - bookworm
- name: Run apt update
apt:
@@ -78,9 +88,9 @@
# When testing, try to make dpkg faster by disabling fsync.
- name: Speed up dpkg
- apt:
- name: dpkg-eatmydata
- state: present
+ copy:
+ dest: "/etc/dpkg/dpkg.cfg.d/no-sync"
+ content: "force-unsafe-io\n"
when: "testing|default(True)"
# Remove legacy stretch/buster mtail package pin.
@@ -89,53 +99,63 @@
path: "/etc/apt/preferences.d/99float-syslog"
state: absent
-- name: Install base packages
- apt:
- name: "{{ packages }}"
- state: present
- vars:
- packages:
+- set_fact:
+ extra_packages: []
+ ssh_packages: []
+ base_packages:
+ # Standard Debian packages
+ - acpid
+ - auditd
- ca-certificates
- - unattended-upgrades
- - systemd-coredump
- - rsync
+ - curl
- git
+ - gpg
+ - jq
+ - lsof
+ - mtail
- ntp
- openssl
- - curl
- - lsof
+ - prometheus-node-exporter
+ - prometheus-node-exporter-collectors
+ - rsync
+ - rsyslog
+ - rsyslog-exporter
+ - rsyslog-relp
+ - rsyslog-openssl
+ - systemd-coredump
+ - unattended-upgrades
+ - zstd
+
+ # Custom packages
+ - assetmon
+ - audisp-json
- cgroups-exporter
+ - firewall
+ - litestream
- logcat
- - tabacco
- restic
- - litestream
- runcron
- - acpid
- - zstd
- - man-db
- - jq
- - gpg
- - firewall
- - rsyslog
- - rsyslog-relp
- - rsyslog-exporter
- - mtail
- - auditd
- - audisp-json
- - prometheus-node-exporter
- - prometheus-node-exporter-collectors
- - assetmon
+ - tabacco
-- name: Install extra packages
- apt:
- name: "{{ extra_packages }}"
- state: present
- vars:
+- set_fact:
+ ssh_packages:
+ - ssh-key-wtmp
+ when: "enable_ssh and float_debian_dist != 'bullseye'"
+
+- set_fact:
extra_packages:
- net-tools
- vim
when: "not testing|default(True)"
+- set_fact:
+ all_packages: "{{ base_packages + ssh_packages + extra_packages }}"
+
+- name: Install packages
+ apt:
+ name: "{{ all_packages }}"
+ state: present
+
- name: Remove blacklisted packages
apt:
name: "{{ packages }}"
diff --git a/float/roles/float-base/tasks/harden.yml b/float/roles/float-base/tasks/harden.yml
index 3202889..4dd4db9 100644
--- a/float/roles/float-base/tasks/harden.yml
+++ b/float/roles/float-base/tasks/harden.yml
@@ -103,3 +103,8 @@
- 'disable-kmod-load.service'
ignore_errors: "{{ ansible_check_mode }}"
+- name: Configure module options and blocklists
+ copy:
+ src: "modprobe-hardening.conf"
+ dest: "/etc/modprobe.d/security.conf"
+
diff --git a/float/roles/float-base/tasks/ipmi.yml b/float/roles/float-base/tasks/ipmi.yml
index 6367e07..c530db2 100644
--- a/float/roles/float-base/tasks/ipmi.yml
+++ b/float/roles/float-base/tasks/ipmi.yml
@@ -1,15 +1,25 @@
---
-- name: Install ipmitool packages and dependency
+- name: Install freeipmi packages and dependency
apt:
name: "{{ packages }}"
state: present
vars:
packages:
- - ipmitool
- - gawk # prometheus-node-exporter-ipmitool-sensor dependency
+ - freeipmi-tools
+ - gawk # node-exporter-freeipmi.awk dependency
-- name: Enable prometheus node-exporter ipmitool sensor
+- name: Remove ipmitool, not used anymore
+ apt:
+ name: ipmitool
+ state: absent
+
+- name: Disable prometheus node-exporter ipmitool sensor
systemd:
name: prometheus-node-exporter-ipmitool-sensor.timer
- state: started
- enabled: yes
+ state: stopped
+ enabled: no
+
+- name: Install freeipmi node-exporter script
+ copy:
+ src: "node-exporter-freeipmi.awk"
+ dest: "/usr/lib/float/node-exporter-freeipmi.awk"
diff --git a/float/roles/float-base/tasks/main.yml b/float/roles/float-base/tasks/main.yml
index af7e332..41b65d5 100644
--- a/float/roles/float-base/tasks/main.yml
+++ b/float/roles/float-base/tasks/main.yml
@@ -40,6 +40,14 @@
- include_tasks: rollback_protection.yml
when: "git_revision != 'none' and not testing|default(True)"
+# Detect virtual machines / physical hardware.
+- name: Detect virtual machine
+ slurp:
+ src: "/sys/class/dmi/id/sys_vendor"
+ register: slurp_sysfs_dmi_vendor
+- set_fact:
+ float_is_vm: "{{ slurp_sysfs_dmi_vendor['content'] | b64decode == 'QEMU' }}"
+
# Create the /usr/lib/float and /var/lib/float directories for
# internal scripts.
- file:
@@ -94,5 +102,7 @@
- include_tasks: ipmi.yml
when: ipmi_device.stat.exists == true
+- include_tasks: systemd.yml
+
# Finally run some cleanups.
- import_tasks: cleanup.yml
diff --git a/float/roles/float-base/tasks/rollback_protection.yml b/float/roles/float-base/tasks/rollback_protection.yml
index 1d42d82..4de38a4 100644
--- a/float/roles/float-base/tasks/rollback_protection.yml
+++ b/float/roles/float-base/tasks/rollback_protection.yml
@@ -50,6 +50,7 @@
float with "-e rollback=true".
when: "commit_guard_stat.stat.exists and commit_compare.rc != 0 and not skip_rollback_protection"
-- copy:
+- name: Update current git revision
+ copy:
dest: /etc/.float-ansible-commit
content: "{{ git_revision }}\n"
diff --git a/float/roles/float-base/tasks/ssh.yml b/float/roles/float-base/tasks/ssh.yml
index 8b0fbee..e19165d 100644
--- a/float/roles/float-base/tasks/ssh.yml
+++ b/float/roles/float-base/tasks/ssh.yml
@@ -70,11 +70,13 @@
path: /etc/ssh/authorized_keys
state: directory
+- float_authorized_keys: {}
+
# Configure root's authorized_keys with the admin keys.
- name: Install admin public keys
authorized_key:
user: root
- key: "{% if emergency_ssh_key %}{{ emergency_ssh_key }}\n{% endif %}{% for a in admins %}{% for k in a.get('ssh_keys', []) %}{{ k }}\n{% endfor %}{% endfor %}"
+ key: "{% if emergency_ssh_key %}{{ emergency_ssh_key }}\n{% endif %}{{ float_authorized_keys }}"
path: /etc/ssh/authorized_keys/root
manage_dir: no
state: present
diff --git a/float/roles/float-base/tasks/systemd.yml b/float/roles/float-base/tasks/systemd.yml
new file mode 100644
index 0000000..896d5fd
--- /dev/null
+++ b/float/roles/float-base/tasks/systemd.yml
@@ -0,0 +1,13 @@
+---
+
+# Find the systemd units matching locally running services, and ensure
+# that they are set to always restart.
+- set_fact:
+ local_systemd_units: "{{ float_enabled_services | map('extract', services) | rejectattr('systemd_services', 'undefined') | map(attribute='systemd_services') | flatten | reject('search', '^docker-') }}"
+- name: Fix systemd services to autorestart
+ include_role:
+ name: float-util-systemd-custom-snippet
+ vars:
+ systemd_unit: "{{ item }}"
+ fix_restart: true
+ loop: "{{ local_systemd_units }}"
diff --git a/float/roles/float-base/templates/firewall/10float.j2 b/float/roles/float-base/templates/firewall/10float.j2
index e8888a4..48c8894 100644
--- a/float/roles/float-base/templates/firewall/10float.j2
+++ b/float/roles/float-base/templates/firewall/10float.j2
@@ -2,10 +2,10 @@
# specific sets of hosts.
{% macro allow_host_ips(h, chain) %}
-{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv4 | sort %}
+{% for ip in hostvars[h]['ips'] | ansible.utils.ipv4 | sort %}
add_rule4 -A {{ chain }} -s {{ ip }} -j ACCEPT
{% endfor %}
-{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv6 | sort %}
+{% for ip in hostvars[h]['ips'] | ansible.utils.ipv6 | sort %}
add_rule6 -A {{ chain }} -s {{ ip }} -j ACCEPT
{% endfor %}
{% endmacro %}
diff --git a/float/roles/float-base/templates/rsyslog.conf.j2 b/float/roles/float-base/templates/rsyslog.conf.j2
index a9de870..e4b9063 100644
--- a/float/roles/float-base/templates/rsyslog.conf.j2
+++ b/float/roles/float-base/templates/rsyslog.conf.j2
@@ -1,5 +1,6 @@
global(
maxMessageSize="64k"
+ defaultNetstreamDriver="ossl"
)
module(load="imuxsock"
@@ -30,7 +31,12 @@ ruleset(name="process_stats") {
action(
type="omprog"
name="to_exporter"
+{% if float_debian_dist in ('buster', 'bullseye') %}
binary="/usr/bin/rsyslog_exporter -web.listen-address=:9106"
+{% else %}
+ binary="/usr/bin/rsyslog_exporter -silent -web.listen-address=:9106"
+{% endif %}
+
queue.type="linkedlist"
queue.workerThreads="1"
)
diff --git a/float/roles/float-base/templates/sources.list.j2 b/float/roles/float-base/templates/sources.list.j2
index 939e9ed..dc0a5c6 100644
--- a/float/roles/float-base/templates/sources.list.j2
+++ b/float/roles/float-base/templates/sources.list.j2
@@ -1,5 +1,5 @@
{% if apt_sources_list_override is defined %}{{ apt_sources_list_override }}{% else %}
-deb http://deb.debian.org/debian {{ float_debian_dist }} main contrib non-free
-deb http://deb.debian.org/debian {{ float_debian_dist }}-updates main contrib non-free
-deb http://security.debian.org/debian-security {{ float_debian_dist }}-security main contrib non-free
+deb http://deb.debian.org/debian {{ float_debian_dist }} {{ apt_debian_components | join(' ') }}
+deb http://deb.debian.org/debian {{ float_debian_dist }}-updates {{ apt_debian_components | join(' ') }}
+deb http://security.debian.org/debian-security {{ float_debian_dist }}-security {{ apt_debian_components | join(' ') }}
{% endif %}
diff --git a/float/roles/float-base/templates/ssh/sshd_config.j2 b/float/roles/float-base/templates/ssh/sshd_config.j2
index 73a5610..beae646 100644
--- a/float/roles/float-base/templates/ssh/sshd_config.j2
+++ b/float/roles/float-base/templates/ssh/sshd_config.j2
@@ -2,9 +2,7 @@
# See the sshd_config(5) manpage for details
Port {{ ssh_port }}
-#ListenAddress ::
-#ListenAddress 0.0.0.0
-Protocol 2
+AddressFamily any
# HostKeys for protocol version 2
{% for key_type in ssh_host_key_types %}
@@ -12,11 +10,6 @@ HostKey /etc/ssh/ssh_host_{{ key_type }}_key
HostCertificate /etc/ssh/ssh_host_{{ key_type }}_key-cert.pub
{% endfor %}
-# Ciphers and MACs
-KexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256
-Ciphers chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr
-MACs hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,umac-128-etm@openssh.com,hmac-sha2-512,hmac-sha2-256,umac-128@openssh.com
-
# Logging. VERBOSE logs the fingerprint of keys used to login.
SyslogFacility AUTH
LogLevel VERBOSE
@@ -24,18 +17,20 @@ LogLevel VERBOSE
# Authentication:
StrictModes yes
AuthorizedKeysFile /etc/ssh/authorized_keys/%u
-PermitRootLogin without-password
+PermitRootLogin prohibit-password
PermitEmptyPasswords no
PubkeyAuthentication yes
UsePAM yes
+# Allow more attempts for people with many keys in their agent.
+MaxAuthTries 10
+
# Disable a bunch of features.
IgnoreRhosts yes
HostbasedAuthentication no
ChallengeResponseAuthentication no
PasswordAuthentication no
UseDNS no
-#IgnoreUserKnownHosts yes
# Makes ansible faster?
PrintMotd no
@@ -49,7 +44,15 @@ ClientAliveInterval 120
#AcceptEnv LANG LC_*
# Be restrictive on forwarding / proxying.
+
+# Disable agent forwarding for the clients' safety.
AllowAgentForwarding no
-AllowTcpForwarding no
-X11Forwarding no
-#PermitTunnel no
+
+# Reverse forwarding can lead to security issues due to manipulating
+# the network perimeter.
+AllowTcpForwarding local
+AllowStreamLocalForwarding local
+PermitListen none
+
+Subsystem sftp /usr/lib/openssh/sftp-server
+
diff --git a/float/roles/float-base/templates/sysctl.conf.j2 b/float/roles/float-base/templates/sysctl.conf.j2
index c28c31e..b06208e 100644
--- a/float/roles/float-base/templates/sysctl.conf.j2
+++ b/float/roles/float-base/templates/sysctl.conf.j2
@@ -49,6 +49,9 @@ net.netfilter.nf_conntrack_max={{ nf_conntrack_max }}
# Restrict core dumps for SUID binaries.
fs.suid_dumpable=0
+# Protect against time-wait assassination.
+net.ipv4.tcp_rfc1337=1
+
# Disable source routed packet acceptance.
net.ipv4.conf.all.accept_source_route=0
net.ipv4.conf.default.accept_source_route=0
@@ -77,6 +80,9 @@ net.ipv4.conf.default.rp_filter=1
# https://lore.kernel.org/patchwork/patch/1034150
dev.tty.ldisc_autoload=0
+# Restrict userfaultfd() syscall to the CAP_SYS_PTRACE capability.
+vm.unprivileged_userfaultfd=0
+
# Additional protections for fifos, hardlinks, regular files, and symlinks
# https://patchwork.kernel.org/patch/10244781
# slightly tightened up from the systemd default values of "1" for each
@@ -114,9 +120,7 @@ kernel.sysrq=0
# (linux-hardened default)
net.core.bpf_jit_harden=2
kernel.unprivileged_bpf_disabled=1
-{% endif %}
-{% if not disable_restricted_sysctl %}
# Disable unprivileged user namespaces
# https://lwn.net/Articles/673597
# (linux-hardened default)
diff --git a/float/roles/float-base/templates/vhostmap.prom.j2 b/float/roles/float-base/templates/vhostmap.prom.j2
index 4444450..08ea619 100644
--- a/float/roles/float-base/templates/vhostmap.prom.j2
+++ b/float/roles/float-base/templates/vhostmap.prom.j2
@@ -6,7 +6,7 @@ Skip public_endpoints with a path, to avoid duplication of entries.
#}
{% for service_name, service in services | dictsort %}
-{% for ep in service.get('public_endpoints', []) %}
+{% for ep in service.get('public_endpoints', []) if ep.get('path', '/') == '/' %}
{% for systemd_service in service.get('systemd_services', []) %}
{% for d in domain_public %}
{% if ep.sharded | default(False) %}
diff --git a/float/roles/float-base/vars/main.yml b/float/roles/float-base/vars/main.yml
index 7a45a63..dc6941a 100644
--- a/float/roles/float-base/vars/main.yml
+++ b/float/roles/float-base/vars/main.yml
@@ -1,5 +1,5 @@
---
# Define the 'rollback' variable to bypass rollback protection.
-skip_rollback_protection: "{{ rollback | default(False) | bool }}"
+skip_rollback_protection: "{{ rollback | default(False) }}"
diff --git a/float/roles/float-infra-acme/tasks/main.yml b/float/roles/float-infra-acme/tasks/main.yml
index 07e922b..9c00041 100644
--- a/float/roles/float-infra-acme/tasks/main.yml
+++ b/float/roles/float-infra-acme/tasks/main.yml
@@ -31,7 +31,16 @@
state: directory
owner: acmeserver
group: acmeserver
- mode: 0700
+ mode: "0700"
+
+- name: Install ACME private key
+ copy:
+ content: "{{ acme_private_key }}\n"
+ dest: "/var/lib/acme/account.key"
+ owner: acmeserver
+ group: acmeserver
+ mode: "0600"
+ when: acme_private_key is defined
- name: Add the acmeserver user to the public-credentials and acme-credentials group
user:
diff --git a/float/roles/float-infra-admin-dashboard/handlers/main.yml b/float/roles/float-infra-admin-dashboard/handlers/main.yml
deleted file mode 100644
index 9cb836e..0000000
--- a/float/roles/float-infra-admin-dashboard/handlers/main.yml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-
-- name: reload admin-dashboard
- systemd:
- name: docker-admin-dashboard-http
- state: restarted
diff --git a/float/roles/float-infra-dns/defaults/main.yml b/float/roles/float-infra-dns/defaults/main.yml
index a66e91e..7b7ddb5 100644
--- a/float/roles/float-infra-dns/defaults/main.yml
+++ b/float/roles/float-infra-dns/defaults/main.yml
@@ -4,3 +4,7 @@
# By default, this is the first public domain.
mx_ns_domain: "{{ domain_public[0] }}"
+# The default CAA record for all zones points to LE since that is what
+# 'acmeserver' uses.
+dns_caa_record: '0 issue "letsencrypt.org"'
+
diff --git a/float/roles/float-infra-dns/templates/bind/named.conf.options b/float/roles/float-infra-dns/templates/bind/named.conf.options
index fb34501..b0ace7b 100644
--- a/float/roles/float-infra-dns/templates/bind/named.conf.options
+++ b/float/roles/float-infra-dns/templates/bind/named.conf.options
@@ -10,7 +10,7 @@ options {
{% if float_limit_bind_to_known_interfaces | default(False) %}
listen-on {
127.0.0.1;
-{% for ip in ips | ansible.netcommon.ipv4 | sort %}
+{% for ip in ips | ansible.utils.ipv4 | sort %}
{{ ip }};
{% endfor %}
{% for n in net_overlays | sort if ('ip_' + n.name) in hostvars[inventory_hostname] %}
@@ -19,7 +19,7 @@ options {
};
listen-on-v6 {
::1;
-{% for ip in ips | ansible.netcommon.ipv6 | sort %}
+{% for ip in ips | ansible.utils.ipv6 | sort %}
{{ ip }};
{% endfor %}
};
@@ -39,6 +39,12 @@ options {
// Conform to RFC1035.
auth-nxdomain no;
+ // Increase tcp-client limit from default, and prevent
+ // idle connections from hanging around.
+ tcp-clients 2000;
+ tcp-idle-timeout 50;
+ tcp-keepalive-timeout 50;
+
allow-transfer { none; };
allow-query {
localhost;
diff --git a/float/roles/float-infra-dns/templates/dns/infra.yml b/float/roles/float-infra-dns/templates/dns/infra.yml
index b104d31..dce0d69 100644
--- a/float/roles/float-infra-dns/templates/dns/infra.yml
+++ b/float/roles/float-infra-dns/templates/dns/infra.yml
@@ -3,8 +3,8 @@
"@ns":
_:
{% for h in services['dns'].hosts | sort %}
-{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %}
-{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %}
+{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %}
+{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %}
{% if host_ip4 %}
- NS ns{{ loop.index }}.{{ mx_ns_domain }}.
{% endif %}
@@ -12,6 +12,9 @@
- NS ns{{ loop.index }}-v6.{{ mx_ns_domain }}.
{% endif %}
{% endfor %}
+{% if dns_caa_record is defined %}
+ - "CAA {{ dns_caa_record | regex_replace('\"', '\\\"') }}"
+{% endif %}
"@base":
EXTENDS: "@ns"
@@ -32,8 +35,8 @@
# The explicit NS delegation for 'l' is necessary for dnssec-sign to work properly.
l:
{% for h in services['dns'].hosts | sort %}
-{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %}
-{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %}
+{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %}
+{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %}
{% if host_ip4 %}
- NS ns{{ loop.index }}.{{ mx_ns_domain }}.
{% endif %}
@@ -45,8 +48,8 @@
{% if d == mx_ns_domain %}
{# Only generate the nameservers' A records on the chosen zone #}
{% for h in services['dns'].hosts | sort %}
-{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %}
-{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %}
+{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %}
+{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %}
{% if host_ip4 %}
ns{{ loop.index }}: {{ host_ip4 | to_json }}
{% endif %}
diff --git a/float/roles/float-infra-dns/templates/zonetool.yml b/float/roles/float-infra-dns/templates/zonetool.yml
index 4f6fcbd..b80c198 100644
--- a/float/roles/float-infra-dns/templates/zonetool.yml
+++ b/float/roles/float-infra-dns/templates/zonetool.yml
@@ -1,6 +1,6 @@
---
{% set all_ips = services['frontend'].hosts | map('extract', hostvars) | rejectattr('traffic', 'false') | map(attribute='public_ips') | reject('undefined') | flatten %}
-FRONTENDS4: {{ all_ips | ansible.netcommon.ipv4 | list | to_json }}
+FRONTENDS4: {{ all_ips | ansible.utils.ipv4 | list | to_json }}
-FRONTENDS6: {{ all_ips | ansible.netcommon.ipv6 | list | to_json }}
+FRONTENDS6: {{ all_ips | ansible.utils.ipv6 | list | to_json }}
diff --git a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2
index ef3c340..ad75398 100644
--- a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2
+++ b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2
@@ -26,7 +26,9 @@ defaults
frontend stats
bind :::8404
mode http
+{% if float_debian_dist in ('buster', 'bullseye') %}
option http-use-htx
+{% endif %}
http-request use-service prometheus-exporter if { path /metrics }
stats enable
stats uri /stats
diff --git a/float/roles/float-infra-log-collector/defaults/main.yml b/float/roles/float-infra-log-collector/defaults/main.yml
index ff97a7f..46142d7 100644
--- a/float/roles/float-infra-log-collector/defaults/main.yml
+++ b/float/roles/float-infra-log-collector/defaults/main.yml
@@ -3,8 +3,6 @@
# Whether to set up elasticsearch/kibana at all.
enable_elasticsearch: true
-es_major_version: "6"
-
# This number is very low and only useful for the testing environment.
es_heap_size: "166m"
diff --git a/float/roles/float-infra-log-collector/tasks/main.yml b/float/roles/float-infra-log-collector/tasks/main.yml
index c86da8d..e9bb796 100644
--- a/float/roles/float-infra-log-collector/tasks/main.yml
+++ b/float/roles/float-infra-log-collector/tasks/main.yml
@@ -49,5 +49,14 @@
template:
src: "rsyslog-collector.conf.j2"
dest: "/etc/rsyslog-collector.conf"
+ vars:
+ rsyslog_port: 6514
+ rsyslog_exporter_port: 9105
+ rsyslog_elasticsearch_host: "127.0.0.1"
+ rsyslog_elasticsearch_port: 9200
+ rsyslog_tls_ca: "/etc/credentials/x509/log-collector/ca.pem"
+ rsyslog_tls_cert: "/etc/credentials/x509/log-collector/server/cert.pem"
+ rsyslog_tls_key: "/etc/credentials/x509/log-collector/server/private_key.pem"
+ rsyslog_tls_permittedpeer: "*.{{ domain }}"
notify: "restart rsyslog-collector"
diff --git a/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2 b/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2
index 9adc7e1..f96c854 100644
--- a/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2
+++ b/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2
@@ -1,6 +1,8 @@
global(
maxMessageSize="64k"
+ workDirectory="/var/spool/rsyslog"
+ defaultNetstreamDriver="ossl"
)
main_queue(
@@ -24,7 +26,7 @@ ruleset(name="process_stats") {
action(
type="omprog"
name="to_exporter"
- binary="/usr/bin/rsyslog_exporter -web.listen-address=:9105"
+ binary="/usr/bin/rsyslog_exporter -web.listen-address=:{{ rsyslog_exporter_port }} -silent"
queue.type="linkedlist"
queue.workerThreads="1"
)
@@ -165,8 +167,8 @@ ruleset(name="incoming"){
if ($syslogfacility-text == "auth" and $programname == "audit") then {
# Structured audit logs go to a dedicated Elasticsearch index.
action(type="omelasticsearch"
- server="127.0.0.1"
- serverport="9200"
+ server="{{ rsyslog_elasticsearch_host }}"
+ serverport="{{ rsyslog_elasticsearch_port }}"
template="esTemplateAudit"
searchIndex="esIndexAudit"
searchType="_doc"
@@ -179,7 +181,6 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize="100"
queue.mindequeuebatchsize.timeout="3000"
queue.filename="es-audit"
- queue.spoolDirectory="/var/spool/rsyslog"
action.resumeretrycount="-1")
} else {
# Extension point for rules applying to structured logs.
@@ -191,8 +192,8 @@ ruleset(name="incoming"){
# Normal structured log present in the default syslog flow. Send
# straight to Elasticsearch, skipping the log normalization step.
action(type="omelasticsearch"
- server="127.0.0.1"
- serverport="9200"
+ server="{{ rsyslog_elasticsearch_host }}"
+ serverport="{{ rsyslog_elasticsearch_port }}"
template="esTemplateJSON"
searchIndex="esIndex"
searchType="_doc"
@@ -205,7 +206,6 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize="100"
queue.mindequeuebatchsize.timeout="3000"
queue.filename="es-structured"
- queue.spoolDirectory="/var/spool/rsyslog"
action.resumeretrycount="-1")
}
} else if ($syslogfacility-text == "local3") then {
@@ -219,8 +219,8 @@ ruleset(name="incoming"){
set $!request = "/sso_login?";
}
action(type="omelasticsearch"
- server="127.0.0.1"
- serverport="9200"
+ server="{{ rsyslog_elasticsearch_host }}"
+ serverport="{{ rsyslog_elasticsearch_port }}"
template="esTemplateHTTP"
searchIndex="esIndexHTTP"
searchType="_doc"
@@ -233,7 +233,6 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize="100"
queue.mindequeuebatchsize.timeout="3000"
queue.filename="es-http"
- queue.spoolDirectory="/var/spool/rsyslog"
action.resumeretrycount="-1")
} else {
# Traditional syslog message. Run it through mmnormalize to
@@ -268,8 +267,8 @@ ruleset(name="incoming"){
# valid and ES will refuse it.
set $!ignore = "1";
action(type="omelasticsearch"
- server="127.0.0.1"
- serverport="9200"
+ server="{{ rsyslog_elasticsearch_host }}"
+ serverport="{{ rsyslog_elasticsearch_port }}"
template="esTemplate"
searchIndex="esIndex"
searchType="_doc"
@@ -282,7 +281,6 @@ ruleset(name="incoming"){
queue.mindequeuebatchsize="100"
queue.mindequeuebatchsize.timeout="3000"
queue.filename="es-default"
- queue.spoolDirectory="/var/spool/rsyslog"
action.resumeretrycount="-1")
}
{% endif %}
@@ -295,14 +293,14 @@ module(
input(
type="imrelp"
- port="6514"
+ port="{{ rsyslog_port }}"
maxDataSize="64k"
ruleset="incoming"
tls="on"
tls.compression="on"
- tls.cacert="/etc/credentials/x509/log-collector/ca.pem"
- tls.mycert="/etc/credentials/x509/log-collector/server/cert.pem"
- tls.myprivkey="/etc/credentials/x509/log-collector/server/private_key.pem"
- tls.permittedpeer="*.{{ domain }}"
+ tls.cacert="{{ rsyslog_tls_ca }}"
+ tls.mycert="{{ rsyslog_tls_cert }}"
+ tls.myprivkey="{{ rsyslog_tls_key }}"
+ tls.permittedpeer="{{ rsyslog_tls_permittedpeer }}"
tls.authmode="certvalid"
)
diff --git a/float/roles/float-infra-nginx/handlers/main.yml b/float/roles/float-infra-nginx/handlers/main.yml
index b41c1ff..397098e 100644
--- a/float/roles/float-infra-nginx/handlers/main.yml
+++ b/float/roles/float-infra-nginx/handlers/main.yml
@@ -11,3 +11,9 @@
systemd:
name: firewall.service
state: restarted
+
+- name: reload mtail
+ systemd:
+ name: mtail.service
+ state: restarted
+
diff --git a/float/roles/float-infra-nginx/meta/main.yml b/float/roles/float-infra-nginx/meta/main.yml
index e57ebd7..ccc8705 100644
--- a/float/roles/float-infra-nginx/meta/main.yml
+++ b/float/roles/float-infra-nginx/meta/main.yml
@@ -4,4 +4,4 @@ dependencies:
- role: float-base-public-credentials
vars:
credentials_type: http
-
+ - role: float-util-tor-exits-dataset
diff --git a/float/roles/float-infra-nginx/tasks/nginx.yml b/float/roles/float-infra-nginx/tasks/nginx.yml
index 33a801a..e30b833 100644
--- a/float/roles/float-infra-nginx/tasks/nginx.yml
+++ b/float/roles/float-infra-nginx/tasks/nginx.yml
@@ -8,6 +8,7 @@
packages:
- sso-proxy
- nginx-full
+ - libnginx-mod-http-headers-more-filter
# SSO proxy setup.
- name: Configure /etc/default/sso-proxy
@@ -113,7 +114,7 @@
- "50-mod-http-upstream-fair.conf"
- "50-mod-http-xslt-filter.conf"
- "50-mod-mail.conf"
- - "50-mod-stream.conf"
+ notify: reload nginx
# Setup the HTTP router configuration.
- name: Configure NGINX (upstreams)
@@ -151,14 +152,14 @@
file:
path: /var/www/html/__errors
state: directory
- when: "nginx_install_custom_error_pages | bool"
+ when: nginx_install_custom_error_pages
- name: Copy custom error messages
copy:
src: "{{ item }}"
dest: /var/www/html/__errors/
with_fileglob: "errors/*"
- when: "nginx_install_custom_error_pages | bool"
+ when: nginx_install_custom_error_pages
# Create the cache directory.
- file:
@@ -191,6 +192,7 @@
template:
src: "nginx.mtail.j2"
dest: "/etc/mtail/nginx.mtail"
+ notify: reload mtail
# Misc cleanup of old files.
- name: Remove obsolete files
diff --git a/float/roles/float-infra-nginx/templates/config/accept.map b/float/roles/float-infra-nginx/templates/config/accept.map
new file mode 100644
index 0000000..b552866
--- /dev/null
+++ b/float/roles/float-infra-nginx/templates/config/accept.map
@@ -0,0 +1,4 @@
+map $http_accept $http_accept_simplified {
+ default $http_accept;
+ ~text/html html;
+}
diff --git a/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf b/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf
index d52e34c..87be0b7 100644
--- a/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf
+++ b/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf
@@ -18,5 +18,6 @@ gzip_types
image/svg+xml
image/x-icon
text/css
+ text/javascript
text/plain;
diff --git a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf
index 68bbc39..7d31efc 100644
--- a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf
+++ b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf
@@ -20,8 +20,9 @@ proxy_redirect off;
proxy_http_version 1.1;
# Set up a global cache.
+include /etc/nginx/accept.map;
proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=global:{{ nginx_cache_keys_mem }} max_size={{ nginx_cache_fs_size }} inactive=1d use_temp_path=off;
-proxy_cache_key "$scheme$host$request_uri$sent_http_content_language";
+proxy_cache_key "$scheme$host$request_uri$http_accept_simplified$sent_http_content_language";
proxy_no_cache $cookie_SSO $http_authorization;
proxy_cache_methods GET HEAD;
proxy_cache_valid 200 10m;
diff --git a/float/roles/float-infra-nginx/templates/config/nginx.conf b/float/roles/float-infra-nginx/templates/config/nginx.conf
index 15896af..8a7066e 100644
--- a/float/roles/float-infra-nginx/templates/config/nginx.conf
+++ b/float/roles/float-infra-nginx/templates/config/nginx.conf
@@ -1,3 +1,4 @@
+include /etc/nginx/modules-enabled/*.conf;
worker_processes auto;
worker_rlimit_nofile {{ nginx_worker_connections * 2 }};
diff --git a/float/roles/float-infra-nginx/templates/nginx.mtail.j2 b/float/roles/float-infra-nginx/templates/nginx.mtail.j2
index b7a292b..e8f414e 100644
--- a/float/roles/float-infra-nginx/templates/nginx.mtail.j2
+++ b/float/roles/float-infra-nginx/templates/nginx.mtail.j2
@@ -4,22 +4,25 @@ counter nginx_http_requests by host, vhost, method, code
counter nginx_http_requests_cache by host, vhost, cache_status
counter nginx_http_bytes by host, vhost, method, code
counter nginx_http_bytes_cache by host, vhost, cache_status
-counter nginx_http_requests_ms by le, host, vhost, method, code
+counter nginx_http_requests_ms by le, host, vhost, method
/(?P<hostname>[-0-9A-Za-z._:]+) nginx_access: (?P<vhost>[-0-9A-Za-z._:]+) \S+ (?P<remote_addr>[0-9a-f\.:]+) - - \[[^\]]+\] "(?P<request_method>[A-Z]+) (?P<request_uri>\S+) (?P<http_version>HTTP\/[0-9\.]+)" (?P<status>\d{3}) ((?P<response_size>\d+)|-) "[^"]*" "[^"]*" ([-0-9A-Za-z._:]+) ((?P<ups_resp_seconds>\d+\.\d+)|-) (?P<request_seconds>\d+)\.(?P<request_milliseconds>\d+) (?P<cache_status>\S+)/ {
nginx_http_request_total++
nginx_http_requests[$hostname][$vhost][$request_method][$status]++
- nginx_http_requests_cache[$hostname][$vhost][$cache_status]++
nginx_http_bytes[$hostname][$vhost][$request_method][$status] += $response_size
- nginx_http_bytes_cache[$hostname][$vhost][$cache_status] += $response_size
+
+ int($status) == 200 {
+ nginx_http_requests_cache[$hostname][$vhost][$cache_status]++
+ nginx_http_bytes_cache[$hostname][$vhost][$cache_status] += $response_size
{# 10ms-5s buckets, with factor=sqrt(2) #}
{% for bucket_ms in [10, 14, 20, 28, 40, 57, 80, 113, 160, 226, 320, 453, 640, 905, 1280, 1810, 2560, 3620, 5119] %}
- $request_seconds * 1000 + $request_milliseconds < {{ bucket_ms }} {
- nginx_http_requests_ms["{{ bucket_ms }}"][$hostname][$vhost][$request_method][$status]++
- }
+ $request_seconds * 1000 + $request_milliseconds < {{ bucket_ms }} {
+ nginx_http_requests_ms["{{ bucket_ms }}"][$hostname][$vhost][$request_method]++
+ }
{% endfor %}
- nginx_http_requests_ms["inf"][$hostname][$vhost][$request_method][$status]++
+ nginx_http_requests_ms["inf"][$hostname][$vhost][$request_method]++
+ }
}
diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json
index 808c733..9c53f9e 100644
--- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json
+++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json
@@ -3,21 +3,29 @@
"list": [
{
"builtIn": 1,
- "datasource": "-- Grafana --",
+ "datasource": {
+ "type": "datasource",
+ "uid": "grafana"
+ },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
"type": "dashboard"
}
]
},
"description": "Bind9 DNS Service Statistics.",
"editable": true,
+ "fiscalYearStartMonth": 0,
"gnetId": 12309,
- "graphTooltip": 0,
- "id": 27,
- "iteration": 1618527684190,
+ "graphTooltip": 1,
"links": [
{
"icon": "external link",
@@ -28,1646 +36,472 @@
"url": "https://github.com/pecastro/grafana-dashboards/blob/master/prometheus/bind9-exporter-dns.json"
}
],
+ "liveNow": false,
"panels": [
{
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 0
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "id": 19,
- "panels": [],
- "repeat": null,
- "title": "System",
- "type": "row"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "datasource": "${DS_PROMETHEUS}",
- "decimals": 1,
"fieldConfig": {
"defaults": {
- "custom": {}
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
},
"overrides": []
},
- "format": "s",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
"gridPos": {
- "h": 4,
- "w": 6,
+ "h": 8,
+ "w": 12,
"x": 0,
- "y": 1
- },
- "height": "150",
- "id": 1,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "s ago",
- "postfixFontSize": "80%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "time() - max(bind_boot_time_seconds{instance=~\"$instance\"}) ",
- "interval": "5m",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 600,
- "target": ""
- }
- ],
- "thresholds": "",
- "title": "Restarted",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "datasource": "${DS_PROMETHEUS}",
- "decimals": 1,
- "fieldConfig": {
- "defaults": {
- "custom": {}
- },
- "overrides": []
- },
- "format": "s",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 4,
- "w": 6,
- "x": 6,
- "y": 1
+ "y": 0
},
- "height": "150px",
"id": 2,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "s ago",
- "postfixFontSize": "80%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "time() - max(bind_config_time_seconds{instance=~\"$instance\"})",
- "interval": "5m",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 600,
- "target": ""
- }
- ],
- "thresholds": "",
- "title": "Reconfigured",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 3,
- "fillGradient": 0,
- "gridPos": {
- "h": 4,
- "w": 12,
- "x": 12,
- "y": 1
- },
- "hiddenSeries": false,
- "id": 3,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 3,
- "links": [],
- "nullPointMode": "null",
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "increase(process_cpu_seconds_total{instance=~\"$instance\", job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Named CPU Time",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
},
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
- "y": 5
- },
- "hiddenSeries": false,
- "id": 4,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 2,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "Max File Descriptors",
- "fill": 0
- }
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
"targets": [
{
- "expr": "process_max_fds{instance=~\"$instance\",job=\"$job\"}",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Max",
- "refId": "A",
- "step": 10,
- "target": ""
- },
- {
- "expr": "process_open_fds{instance=~\"$instance\",job=\"$job\"}",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Open",
- "refId": "B",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "File Descriptors",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 32,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by ()",
+ "legendFormat": "qps",
+ "range": true,
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Queries",
+ "type": "timeseries"
},
{
- "aliasColors": {
- "Resident": "#890F02",
- "Virtual": "#0A437C",
- "Virtual Memory": "#0A437C"
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
- "custom": {},
- "links": []
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
},
"overrides": []
},
- "fill": 2,
- "fillGradient": 0,
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 12,
"x": 12,
- "y": 5
- },
- "hiddenSeries": false,
- "id": 5,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
+ "y": 0
},
- "lines": true,
- "linewidth": 3,
- "links": [],
- "nullPointMode": "null",
+ "id": 3,
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "process_virtual_memory_bytes{instance=~\"$instance\",job=\"$job\"}",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Virtual",
- "refId": "A",
- "step": 10,
- "target": ""
- },
- {
- "expr": "process_resident_memory_bytes{instance=~\"$instance\",job=\"$job\"}",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Resident",
- "refId": "B",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Memory",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 12
- },
- "hiddenSeries": false,
- "id": 9,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
},
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
"targets": [
{
- "expr": "increase(bind_query_duplicates_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Duplicates",
- "refId": "A",
- "step": 4,
- "target": ""
- },
- {
- "expr": "increase(bind_query_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ error }}",
- "refId": "B",
- "step": 4,
- "target": ""
- },
- {
- "expr": "increase(bind_query_recursions_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "Recursions",
- "refId": "C",
- "step": 4,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Queries",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Queries (by host)",
+ "type": "timeseries"
},
{
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 19
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "id": 21,
- "panels": [],
- "repeat": null,
- "title": "Incoming",
- "type": "row"
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
- "custom": {},
- "links": []
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
},
"overrides": []
},
- "fill": 1,
- "fillGradient": 0,
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 12,
"x": 0,
- "y": 20
- },
- "hiddenSeries": false,
- "id": 6,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {}
- ],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "irate(bind_incoming_queries_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ type }}",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Incoming Queries",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "decimals": null,
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "decimals": -1,
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 20
+ "y": 8
},
- "hiddenSeries": false,
- "id": 7,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
+ "id": 4,
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "irate(bind_incoming_requests_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ opcode }}",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Incoming Request Opcodes",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 27
},
- "hiddenSeries": false,
- "id": 8,
- "legend": {
- "alignAsTable": true,
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "rightSide": true,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "irate(bind_responses_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ result }}",
- "refId": "A",
- "step": 4,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Response Results",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "sum(rate(bind_resolver_queries_total{host=~\"$host\",view=~\"$view\"}[$__rate_interval])) by (host, view)",
+ "legendFormat": "{{host}}/{{view}}",
+ "range": true,
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Resolver Queries",
+ "type": "timeseries"
},
{
- "collapsed": false,
- "datasource": null,
- "gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
- "y": 34
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "id": 23,
- "panels": [],
- "repeat": null,
- "title": "Resolver",
- "type": "row"
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
- "custom": {},
- "links": []
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ }
},
"overrides": []
},
- "fill": 1,
- "fillGradient": 0,
"gridPos": {
- "h": 7,
- "w": 24,
- "x": 0,
- "y": 35
- },
- "hiddenSeries": false,
- "id": 15,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 8
},
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
+ "id": 5,
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "irate(bind_resolver_response_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ error }}",
- "refId": "A",
- "step": 4,
- "target": ""
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
},
- {
- "expr": "irate(bind_resolver_response_lame_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / LAME",
- "refId": "B",
- "step": 4,
- "target": ""
- },
- {
- "expr": "irate(bind_resolver_response_mismatch_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / MISMATCH",
- "refId": "C",
- "step": 4,
- "target": ""
- },
- {
- "expr": "irate(bind_resolver_response_truncated_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / TRUNCATED",
- "refId": "D",
- "step": 4,
- "target": ""
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Resolver Response Errors",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
},
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 0,
- "y": 42
- },
- "hiddenSeries": false,
- "id": 12,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "irate(bind_resolver_queries_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ type }}",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Resolver Queries",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host) - sum(rate(bind_resolver_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)",
+ "legendFormat": "__auto",
+ "range": true,
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Authoritative Queries (by host)",
+ "type": "timeseries"
},
{
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 8,
- "y": 42
- },
- "hiddenSeries": false,
- "id": 13,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "irate(bind_resolver_query_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ error }}",
- "refId": "A",
- "step": 10,
- "target": ""
- },
- {
- "expr": "irate(bind_resolver_query_edns0_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / EDNS0",
- "refId": "B",
- "step": 10,
- "target": ""
- },
- {
- "expr": "irate(bind_resolver_query_retries_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / Retry",
- "refId": "C",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Query Errors",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 8,
- "x": 16,
- "y": 42
- },
- "hiddenSeries": false,
- "id": 14,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
- "targets": [
- {
- "expr": "irate(bind_resolver_query_duration_seconds_bucket{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ le }}",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Query By Duration",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
"fieldConfig": {
"defaults": {
- "custom": {},
- "links": []
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 0,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "auto",
+ "spanNulls": false,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percentunit"
},
"overrides": []
},
- "fill": 1,
- "fillGradient": 0,
"gridPos": {
- "h": 7,
+ "h": 8,
"w": 12,
"x": 0,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 10,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
+ "y": 16
},
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
+ "id": 6,
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "bind_resolver_cache_rrsets{instance=~\"$instance\",job=\"$job\"}",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ type }}",
- "refId": "A",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Resolver Cache RR Sets",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "legend": {
+ "calcs": [],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
},
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "tooltip": {
+ "mode": "single",
+ "sort": "none"
}
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_PROMETHEUS}",
- "fieldConfig": {
- "defaults": {
- "custom": {},
- "links": []
- },
- "overrides": []
- },
- "fill": 1,
- "fillGradient": 0,
- "gridPos": {
- "h": 7,
- "w": 12,
- "x": 12,
- "y": 49
- },
- "hiddenSeries": false,
- "id": 11,
- "legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": false
},
- "lines": true,
- "linewidth": 1,
- "links": [],
- "nullPointMode": "null",
- "options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.4.0",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
"targets": [
{
- "expr": "irate(bind_resolver_dnssec_validation_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / ValErr",
- "refId": "A",
- "step": 10,
- "target": ""
- },
- {
- "expr": "irate(bind_resolver_dnssec_validation_success_total{instance=~\"$instance\",job=\"$job\"}[120s])",
- "interval": "",
- "intervalFactor": 2,
- "legendFormat": "{{ view }} / {{ result }}",
- "refId": "B",
- "step": 10,
- "target": ""
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "DNSSEC Validation",
- "tooltip": {
- "shared": true,
- "sort": 2,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "editorMode": "code",
+ "expr": "sum(rate(bind_query_errors_total{host=~\"$host\"}[$__rate_interval])) by (host,error) / ignoring(error) group_left sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)",
+ "legendFormat": "{{host}}/{{error}}",
+ "range": true,
+ "refId": "A"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Query Error Ratio",
+ "type": "timeseries"
}
],
"refresh": "10s",
- "schemaVersion": 27,
+ "schemaVersion": 37,
"style": "dark",
"tags": [
"bind",
@@ -1683,8 +517,6 @@
"text": "localhost",
"value": "localhost"
},
- "description": null,
- "error": null,
"hide": 0,
"includeAll": false,
"label": "datasource",
@@ -1699,16 +531,46 @@
"type": "datasource"
},
{
- "allValue": null,
"current": {
- "selected": false,
- "text": "frontend",
- "value": "frontend"
+ "selected": true,
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
+ },
+ "definition": "label_values(bind_up, host)",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Host:",
+ "multi": false,
+ "name": "host",
+ "options": [],
+ "query": {
+ "query": "label_values(bind_up, host)",
+ "refId": "StandardVariableQuery"
+ },
+ "refresh": 1,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "current": {
+ "selected": true,
+ "text": "dns_9119",
+ "value": "dns_9119"
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "datasource": "${DS_PROMETHEUS}",
"definition": "label_values(bind_up, job)",
- "description": null,
- "error": null,
"hide": 0,
"includeAll": false,
"label": "Job",
@@ -1724,41 +586,35 @@
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
- "tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
- "allValue": null,
"current": {
"selected": true,
- "text": "latitanza.frontend.investici.org:9119",
- "value": "latitanza.frontend.investici.org:9119"
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "${DS_PROMETHEUS}"
},
- "datasource": "${DS_PROMETHEUS}",
- "definition": "label_values(bind_up, instance)",
- "description": null,
- "error": null,
+ "definition": "label_values(bind_resolver_cache_rrsets, view)",
"hide": 0,
"includeAll": true,
- "label": "Host:",
"multi": false,
- "name": "instance",
+ "name": "view",
"options": [],
"query": {
- "query": "label_values(bind_up, instance)",
+ "query": "label_values(bind_resolver_cache_rrsets, view)",
"refId": "StandardVariableQuery"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
+ "sort": 0,
+ "type": "query"
}
]
},
@@ -1794,5 +650,6 @@
"timezone": "browser",
"title": "DNS",
"uid": "XTqyUORMz",
- "version": 2
+ "version": 3,
+ "weekStart": ""
} \ No newline at end of file
diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json
index 7a98261..e73a59c 100644
--- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json
+++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json
@@ -3,19 +3,27 @@
"list": [
{
"builtIn": 1,
- "datasource": "-- Grafana --",
+ "datasource": {
+ "type": "datasource",
+ "uid": "grafana"
+ },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
+ "target": {
+ "limit": 100,
+ "matchAny": false,
+ "tags": [],
+ "type": "dashboard"
+ },
"type": "dashboard"
}
]
},
"editable": true,
- "gnetId": null,
+ "fiscalYearStartMonth": 0,
"graphTooltip": 1,
- "iteration": 1622901747496,
"links": [
{
"asDropdown": true,
@@ -30,6 +38,7 @@
"type": "dashboards"
}
],
+ "liveNow": false,
"panels": [
{
"alerting": {},
@@ -40,15 +49,14 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"decimals": 0,
"description": "See $cluster",
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"grid": {},
@@ -84,7 +92,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -94,6 +102,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "100 - (avg by (cpu) (irate(node_cpu_seconds_total{mode=\"idle\", host=~\"$server\"}[$__rate_interval])) * 100)",
"format": "time_series",
"hide": true,
@@ -103,6 +115,10 @@
"step": 200
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "sum by (mode) (irate(node_cpu_seconds_total{mode!=\"idle\",host=~\"$server\"}[$__rate_interval])) / scalar(count(node_cpu_seconds_total{mode=\"idle\",host=~\"$server\"}))",
"format": "time_series",
"hide": false,
@@ -113,9 +129,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "CPU: utilization",
"tooltip": {
"msResolution": false,
@@ -125,9 +139,7 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
@@ -142,16 +154,12 @@
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": false
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -163,14 +171,13 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"description": "",
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 0,
"fillGradient": 0,
"grid": {},
@@ -199,7 +206,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -222,6 +229,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_load1{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 4,
@@ -231,6 +242,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_load5{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 4,
@@ -240,6 +255,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_load15{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 4,
@@ -249,6 +268,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))",
"format": "time_series",
"intervalFactor": 4,
@@ -258,6 +281,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))/2",
"format": "time_series",
"hide": true,
@@ -269,9 +296,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "CPU: saturation (load avg)",
"tooltip": {
"msResolution": false,
@@ -281,9 +306,7 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
@@ -292,22 +315,17 @@
"format": "short",
"label": "",
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -322,13 +340,12 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"grid": {},
@@ -361,7 +378,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -377,6 +394,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_MemTotal_bytes{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 2,
@@ -387,6 +408,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_Cached_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": false,
@@ -398,6 +423,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_MemTotal_bytes{host=~\"$server\"} - node_memory_Writeback_bytes{host=~\"$server\"} - node_memory_Cached_bytes{host=~\"$server\"} - node_memory_Buffers_bytes{host=~\"$server\"} - node_memory_MemFree_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": false,
@@ -409,6 +438,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_MemFree_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": true,
@@ -421,9 +454,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Memory: utilization",
"tooltip": {
"msResolution": false,
@@ -433,33 +464,25 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
- "label": null,
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -475,13 +498,12 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"grid": {},
@@ -514,7 +536,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -530,6 +552,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_MemTotal_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": true,
@@ -541,6 +567,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "1 - (node_memory_SwapFree_bytes{host=~\"$server\"} / node_memory_SwapTotal_bytes{host=~\"$server\"})",
"format": "time_series",
"hide": true,
@@ -552,6 +582,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_Dirty_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": true,
@@ -563,6 +597,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_WritebackTmp_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": true,
@@ -574,6 +612,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_vmstat_pswpin{host=~\"$server\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -585,6 +627,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_memory_Writeback_bytes{host=~\"$server\"}",
"format": "time_series",
"hide": true,
@@ -596,6 +642,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_vmstat_pswpout{host=~\"$server\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -608,9 +658,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Memory: saturation",
"tooltip": {
"msResolution": false,
@@ -620,24 +668,19 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "hertz",
- "label": null,
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"format": "percentunit",
- "label": null,
"logBase": 1,
"max": "1",
"min": "0",
@@ -645,8 +688,7 @@
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -660,13 +702,12 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 0,
"fillGradient": 0,
"grid": {},
@@ -695,7 +736,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -710,6 +751,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "8*sum(irate(node_network_receive_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)",
"format": "time_series",
"hide": false,
@@ -720,6 +765,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "8*sum(irate(node_network_transmit_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)",
"format": "time_series",
"hide": false,
@@ -730,6 +779,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "8*sum(irate(node_network_transmit_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)",
"format": "time_series",
"hide": true,
@@ -741,6 +794,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "8*sum(irate(node_network_receive_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)",
"format": "time_series",
"hide": true,
@@ -752,9 +809,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Network: utilization",
"tooltip": {
"msResolution": false,
@@ -764,18 +819,14 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bps",
- "label": null,
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
@@ -783,14 +834,12 @@
"format": "pps",
"label": "",
"logBase": 32,
- "max": null,
"min": "0",
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -799,13 +848,12 @@
"bars": true,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"grid": {},
@@ -834,7 +882,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -844,6 +892,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_network_transmit_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -855,6 +907,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_network_transmit_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -867,9 +923,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Network: errors",
"tooltip": {
"msResolution": false,
@@ -879,100 +933,141 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "pps",
- "label": null,
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"format": "none",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
- "alerting": {},
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "localhost",
- "decimals": 0,
- "editable": true,
- "error": false,
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"fieldConfig": {
- "defaults": {},
- "overrides": []
+ "defaults": {
+ "color": {
+ "mode": "palette-classic"
+ },
+ "custom": {
+ "axisCenteredZero": false,
+ "axisColorMode": "text",
+ "axisLabel": "",
+ "axisPlacement": "auto",
+ "barAlignment": 0,
+ "drawStyle": "line",
+ "fillOpacity": 10,
+ "gradientMode": "none",
+ "hideFrom": {
+ "legend": false,
+ "tooltip": false,
+ "viz": false
+ },
+ "lineInterpolation": "linear",
+ "lineWidth": 1,
+ "pointSize": 5,
+ "scaleDistribution": {
+ "type": "linear"
+ },
+ "showPoints": "never",
+ "spanNulls": true,
+ "stacking": {
+ "group": "A",
+ "mode": "none"
+ },
+ "thresholdsStyle": {
+ "mode": "off"
+ }
+ },
+ "mappings": [],
+ "max": 1,
+ "min": 0,
+ "thresholds": {
+ "mode": "absolute",
+ "steps": [
+ {
+ "color": "green",
+ "value": null
+ },
+ {
+ "color": "red",
+ "value": 80
+ }
+ ]
+ },
+ "unit": "percentunit"
+ },
+ "overrides": [
+ {
+ "matcher": {
+ "id": "byName",
+ "options": "{host=\"172.17.0.1:9100\"}"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "ms"
+ }
+ ]
+ },
+ {
+ "matcher": {
+ "id": "byRegexp",
+ "options": "/time/"
+ },
+ "properties": [
+ {
+ "id": "unit",
+ "value": "ms"
+ }
+ ]
+ }
+ ]
},
- "fill": 1,
- "fillGradient": 0,
- "grid": {},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
"y": 21
},
- "hiddenSeries": false,
"id": 6,
- "legend": {
- "avg": false,
- "current": true,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
"links": [],
- "nullPointMode": "connected",
"options": {
- "alertThreshold": true
- },
- "percentage": false,
- "pluginVersion": "7.5.7",
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [
- {
- "alias": "read",
- "yaxis": 1
- },
- {
- "alias": "{host=\"172.17.0.1:9100\"}",
- "yaxis": 2
- },
- {
- "alias": "/time/",
- "yaxis": 2
+ "legend": {
+ "calcs": [
+ "lastNotNull"
+ ],
+ "displayMode": "list",
+ "placement": "bottom",
+ "showLegend": true
+ },
+ "tooltip": {
+ "mode": "multi",
+ "sort": "desc"
}
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ },
+ "pluginVersion": "9.3.2",
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_disk_reads_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -983,6 +1078,10 @@
"step": 1200
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_disk_writes_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -993,6 +1092,10 @@
"step": 1200
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1003,55 +1106,34 @@
"step": 20
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "C",
"step": 30
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeRegions": [],
- "timeShift": null,
- "title": "Disk: utilization",
- "tooltip": {
- "msResolution": false,
- "shared": true,
- "sort": 2,
- "value_type": "cumulative"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "percentunit",
- "label": "",
- "logBase": 1,
- "max": "1",
- "min": "0",
- "show": true
},
{
- "format": "ms",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
+ "editorMode": "code",
+ "expr": "sum(node_md_state{host=~\"$server\",state!=\"active\"}) by (device,state) > 0",
+ "format": "time_series",
+ "hide": false,
+ "interval": "",
+ "legendFormat": "{{device}}: {{state}}",
+ "range": true,
+ "refId": "E"
}
],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
+ "title": "Disk: utilization",
+ "type": "timeseries"
},
{
"alerting": {},
@@ -1059,13 +1141,12 @@
"bars": true,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 1,
"fillGradient": 0,
"grid": {},
@@ -1094,7 +1175,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 1,
"points": false,
"renderer": "flot",
@@ -1117,6 +1198,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"exemplar": true,
"expr": "node_disk_io_now{host=~\"$server.*\"}",
"format": "time_series",
@@ -1129,6 +1214,10 @@
"step": 60
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1139,6 +1228,10 @@
"step": 1200
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_md_is_active{host=~\"$server\"} < 1",
"format": "time_series",
"hide": false,
@@ -1150,9 +1243,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Disk: saturation",
"tooltip": {
"msResolution": false,
@@ -1162,36 +1253,27 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:92",
- "decimals": null,
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"$$hashKey": "object:93",
"format": "ms",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1199,10 +1281,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
- "fieldConfig": {
- "defaults": {},
- "overrides": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
"fill": 1,
"fillGradient": 0,
@@ -1231,7 +1312,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -1241,6 +1322,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_sockstat_TCP_tw{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 2,
@@ -1250,6 +1335,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_sockstat_UDP_inuse{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 2,
@@ -1259,6 +1348,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_sockstat_TCP_inuse{host=~\"$server\"}",
"format": "time_series",
"intervalFactor": 2,
@@ -1268,6 +1361,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}",
"format": "time_series",
"hide": true,
@@ -1278,9 +1375,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Socket: utilization",
"tooltip": {
"shared": true,
@@ -1289,33 +1384,24 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1323,10 +1409,9 @@
"bars": true,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
- "fieldConfig": {
- "defaults": {},
- "overrides": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
"fill": 1,
"fillGradient": 0,
@@ -1355,7 +1440,7 @@
"alertThreshold": true
},
"percentage": false,
- "pluginVersion": "7.5.7",
+ "pluginVersion": "9.3.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
@@ -1365,6 +1450,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Tcp_InErrs{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
@@ -1374,6 +1463,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Tcp_AttemptFails{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
@@ -1383,6 +1476,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Tcp_EstabResets{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1393,6 +1490,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Udp_RcvbufErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Udp_SndbufErrors{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
@@ -1403,6 +1504,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Udp_InErrors{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 2,
@@ -1412,6 +1517,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_TcpExt_RcvPruned{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1422,6 +1531,10 @@
"step": 30
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_TcpExt_SyncookiesFailed{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1432,6 +1545,10 @@
"step": 30
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_TcpExt_ListenDrops{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1442,6 +1559,10 @@
"step": 30
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Icmp_InErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Icmp_OutErrors{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -1452,6 +1573,10 @@
"step": 30
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_netstat_Tcp_OutRsts{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1464,9 +1589,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "Socket: errors",
"tooltip": {
"shared": true,
@@ -1475,9 +1598,7 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
@@ -1486,22 +1607,17 @@
"format": "hertz",
"label": "",
"logBase": 1,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1509,10 +1625,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
- "fieldConfig": {
- "defaults": {},
- "overrides": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
"fill": 1,
"fillGradient": 0,
@@ -1551,6 +1666,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_filefd_allocated{host=~\"$server.*\"} / node_filefd_maximum{host=~\"$server.*\"}",
"format": "time_series",
"hide": false,
@@ -1562,6 +1681,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_procs_running{host=~\"$server.*\"}",
"format": "time_series",
"hide": false,
@@ -1573,6 +1696,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}",
"format": "time_series",
"hide": false,
@@ -1584,6 +1711,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_",
"format": "time_series",
"hide": false,
@@ -1596,9 +1727,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "misc: utilization",
"tooltip": {
"shared": true,
@@ -1607,34 +1736,26 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
- "decimals": null,
"format": "short",
"label": "",
"logBase": 1024,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1642,10 +1763,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
- "fieldConfig": {
- "defaults": {},
- "overrides": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
"fill": 1,
"fillGradient": 0,
@@ -1684,6 +1804,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_procs_blocked{host=~\"$server.*\"}",
"format": "time_series",
"hide": false,
@@ -1695,6 +1819,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_entropy_available_bits{host=~\"$server.*\"}",
"format": "time_series",
"intervalFactor": 2,
@@ -1705,6 +1833,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_forks_total{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1716,6 +1848,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1728,9 +1864,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "misc: saturation",
"tooltip": {
"shared": true,
@@ -1739,33 +1873,25 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
- "label": null,
"logBase": 1024,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1773,10 +1899,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
- "fieldConfig": {
- "defaults": {},
- "overrides": []
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
"fill": 1,
"fillGradient": 0,
@@ -1815,6 +1940,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_edac_uncorrectable_errors_total{host=\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -1826,6 +1955,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_textfile_scrape_error{host=\"$server.*\"}",
"format": "time_series",
"hide": false,
@@ -1837,6 +1970,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": true,
@@ -1848,6 +1985,10 @@
"target": "isNonNull()"
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "rate(node_edac_correctable_errors_total{host=\"$server.*\"}[$__rate_interval])",
"format": "time_series",
"hide": false,
@@ -1860,9 +2001,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "misc: errors",
"tooltip": {
"shared": true,
@@ -1871,33 +2010,25 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
- "label": null,
"logBase": 1024,
- "max": null,
"min": "0",
"show": true
},
{
"format": "short",
- "label": null,
"logBase": 1,
- "max": null,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
},
{
@@ -1906,13 +2037,12 @@
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "localhost",
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"editable": true,
"error": false,
- "fieldConfig": {
- "defaults": {},
- "overrides": []
- },
"fill": 0,
"fillGradient": 0,
"grid": {},
@@ -1952,6 +2082,10 @@
"steppedLine": false,
"targets": [
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "1- (node_filesystem_avail_bytes{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_size_bytes{host=~\"$server\"})",
"format": "time_series",
"hide": false,
@@ -1962,6 +2096,10 @@
"target": ""
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "1- (node_filesystem_files_free{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_files{host=~\"$server\"})",
"format": "time_series",
"hide": false,
@@ -1971,6 +2109,10 @@
"step": 60
},
{
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
+ },
"expr": "node_filesystem_size_bytes",
"format": "time_series",
"hide": true,
@@ -1982,9 +2124,7 @@
}
],
"thresholds": [],
- "timeFrom": null,
"timeRegions": [],
- "timeShift": null,
"title": "filesystem: utilization",
"tooltip": {
"msResolution": false,
@@ -1994,38 +2134,31 @@
},
"type": "graph",
"xaxis": {
- "buckets": null,
"mode": "time",
- "name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
- "label": null,
"logBase": 1,
"max": 1,
- "min": null,
"show": true
},
{
"format": "percentunit",
- "label": null,
"logBase": 1,
"max": 1,
- "min": null,
"show": true
}
],
"yaxis": {
- "align": false,
- "alignLevel": null
+ "align": false
}
}
],
"refresh": "5m",
- "schemaVersion": 27,
+ "schemaVersion": 37,
"style": "dark",
"tags": [
"prometheus",
@@ -2034,19 +2167,18 @@
"templating": {
"list": [
{
- "allValue": null,
"current": {
"selected": true,
- "text": "indolenza",
- "value": "indolenza"
+ "text": "assenza",
+ "value": "assenza"
+ },
+ "datasource": {
+ "type": "prometheus",
+ "uid": "P49960DE5880E8C68"
},
- "datasource": "localhost",
"definition": "",
- "description": null,
- "error": null,
"hide": 0,
"includeAll": false,
- "label": null,
"multi": false,
"name": "server",
"options": [],
@@ -2058,9 +2190,6 @@
"regex": "",
"skipUrlSync": false,
"sort": 1,
- "tagValuesQuery": null,
- "tags": [],
- "tagsQuery": null,
"type": "query",
"useTags": false
}
@@ -2098,5 +2227,6 @@
"timezone": "utc",
"title": "Host overview",
"uid": "W8eE_Qgik",
- "version": 12
-}
+ "version": 13,
+ "weekStart": ""
+} \ No newline at end of file
diff --git a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2
index 0a6caa3..f0f23d2 100644
--- a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2
+++ b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2
@@ -44,6 +44,9 @@
{% macro job_service_config(service_name, target_config) %}
- job_name: "{{ service_name }}_{{ target_config.port }}"
scheme: "{{ target_config.get('scheme', 'https') }}"
+{% if target_config.get('scrape_interval') %}
+ scrape_interval: "{{ target_config['scrape_interval'] }}"
+{% endif %}
{% if target_config.get('metrics_path') %}
metrics_path: "{{ target_config['metrics_path'] }}"
{% endif %}
@@ -101,6 +104,7 @@ scrape_configs:
{# Blackbox probes #}
+{% set probe_scrape_interval = prometheus_probe_scrape_interval | default(prometheus_scrape_interval) %}
{% for prober_host in services['prometheus'].hosts|sort %}
{% set prober_idx = loop.index %}
@@ -109,6 +113,7 @@ scrape_configs:
{% for target_config in service.get('monitoring_endpoints', []) %}
- job_name: "prober_health_{{ service_name | replace('-', '_') }}_{{ prober_idx }}_{{ loop.index }}"
metrics_path: "/probe"
+ scrape_interval: "{{ probe_scrape_interval }}"
params:
module:
- http_health_{{ target_config.healthcheck_http_method | default('HEAD') | lower }}
@@ -135,6 +140,7 @@ scrape_configs:
probe: health
probeset: health
prober_float_service: prometheus
+ prober_float_endpoint: prober
float_service: "{{ service_name }}"
float_job: "{{ service_name }}_{{ target_config.port }}"
{% endfor %}
@@ -142,6 +148,7 @@ scrape_configs:
- job_name: "prober_ping_{{ loop.index }}"
metrics_path: "/probe"
+ scrape_interval: "{{ probe_scrape_interval }}"
params:
module:
- ping
@@ -168,9 +175,11 @@ scrape_configs:
probe: ping
probeset: base
prober_float_service: prometheus
+ prober_float_endpoint: prober
- job_name: "prober_https_{{ prober_idx }}"
metrics_path: "/probe"
+ scrape_interval: "{{ probe_scrape_interval }}"
params:
module:
- http_base
@@ -197,9 +206,11 @@ scrape_configs:
probe: https
probeset: base
prober_float_service: prometheus
+ prober_float_endpoint: prober
- job_name: "prober_dns_{{ prober_idx }}"
metrics_path: "/probe"
+ scrape_interval: "{{ probe_scrape_interval }}"
params:
module: [dns_toplevel]
relabel_configs:
@@ -225,6 +236,7 @@ scrape_configs:
probe: dns
probeset: base
prober_float_service: prometheus
+ prober_float_endpoint: prober
{% endfor %}
@@ -233,6 +245,7 @@ scrape_configs:
{% for prober_host in services[p.service].hosts | sort %}
- job_name: "prober_{{ p.name }}_{{ loop.index }}"
metrics_path: "/probe"
+ scrape_interval: "{{ p.scrape_interval | default(probe_scrape_interval) }}"
params:
module:
- {{ p.module | default(p.name) }}
@@ -267,6 +280,7 @@ scrape_configs:
probe: {{ p.name }}
probeset: custom
prober_float_service: {{ p.service }}
+ prober_float_endpoint: {{ float_http_endpoints_by_port[p.port] | default(p.service) }}
{% if p.service is defined %}
{% for k, v in services[p.service].prober_labels | default({}) | dictsort %}
{{ k }}: {{ v }}
@@ -280,6 +294,9 @@ scrape_configs:
- job_name: "{{ target.name }}"
scheme: "{{ target.scheme | default('http') }}"
metrics_path: "{{ target.metrics_path | default('/metrics') }}"
+{% if target.scrape_interval is defined %}
+ scrape_interval: "{{ target.scrape_interval }}"
+{% endif %}
static_configs:
- targets: {{ target.targets | to_json }}
labels:
@@ -320,6 +337,10 @@ scrape_configs:
- targets: {{ prometheus_federated_targets | to_json }}
{% endif %}
+{% if prometheus_extra_scrape_config is defined %}
+{{ prometheus_extra_scrape_config }}
+{% endif %}
+
rule_files:
- /etc/prometheus/rules/*.yml
diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml
index 7490704..eb04ce1 100644
--- a/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml
+++ b/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml
@@ -95,7 +95,7 @@ groups:
Probe {{ $labels.probe }} ({{ $labels.host }}) is failing for target {{ $labels.host }}
(success ratio {{ $value }}).
- Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
+ Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
- alert: ProbeFailure
@@ -110,7 +110,7 @@ groups:
Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing globally
(success ratio {{ $value }}).
- Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
+ Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
{# Specific ProbeFailure alerts for each custom prober_service->timeout pair #}
@@ -128,7 +128,7 @@ groups:
Probe {{ $labels.probe }} ({{ $labels.host }}) is failing for target {{ $labels.host }}
(success ratio {{ $value }}).
- Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
+ Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
- alert: ProbeFailure
@@ -143,7 +143,7 @@ groups:
Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing globally
(success ratio {{ $value }}).
- Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/
+ Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/
runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]'
{% endfor %}
diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml
index f01ec0c..18a262f 100644
--- a/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml
+++ b/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml
@@ -3,7 +3,7 @@ groups:
rules:
- alert: DiskWillFillIn4Hours
expr: (predict_linear(node_filesystem_avail_bytes[1h], 4 * 3600) < 0) and (node_filesystem_avail_bytes / node_filesystem_size_bytes < 0.6)
- for: 30m
+ for: 1h
labels:
severity: page
scope: host
diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml
deleted file mode 100644
index 5c63354..0000000
--- a/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-groups:
-- name: roles/float-infra-prometheus/templates/rules/alerts_nginx.conf
- rules:
-
- - alert: HTTPErrorRatioHigh
- expr: (global:nginx_http_requests_errs:ratio > 0.2 and global:nginx_http_requests_total:rate5m > 0.1)
- for: 5m
- labels:
- scope: global
- service: nginx
- severity: page
- annotations:
- summary: 'High HTTP error ratio for {{$labels.vhost}} globally'
- description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on all frontends.'
- runbook: '[[ alert_runbook_fmt | format("HTTPErrorRatioHigh") ]]'
-
- - alert: HTTPErrorRatioHigh
- expr: (host:nginx_http_requests_errs:ratio > 0.2 and host:nginx_http_requests_total:rate5m > 0.1)
- for: 10m
- labels:
- scope: host
- service: nginx
- severity: page
- annotations:
- summary: 'High HTTP error ratio for {{$labels.vhost}} on {{$labels.host}}'
- description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on {{$labels.host}}.'
- runbook: '[[ alert_runbook_fmt | format("HTTPErrorRatioHigh") ]]'
-
diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml
index c7df069..bb20fb7 100644
--- a/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml
+++ b/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml
@@ -31,6 +31,10 @@ groups:
- record: probe:probe_success:ratio
expr: probe:probe_success:sum / probe:probe_success:count
+ # Separate SLI-oriented metric that looks at success across prober hosts.
+ - record: probe:probe_success:max
+ expr: max(probe_success) without (job,instance,prober_host,host)
+
# Special metric for the ping probe.
# The 'bool' qualifier makes the greater-than operation not act as a filter.
- record: host_reachable
diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml
index 2041c9a..01e7767 100644
--- a/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml
+++ b/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml
@@ -29,6 +29,10 @@ groups:
expr: sum(rate(node_network_transmit_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
- record: instance:public_network_receive_bytes_total:rate5m
expr: sum(rate(node_network_receive_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device)
+ - record: instance:internal_network_transmit_bytes_total:rate5m
+ expr: sum(rate(node_network_transmit_bytes_total{device=~"vpn.*"}[5m])) without (device)
+ - record: instance:internal_network_receive_bytes_total:rate5m
+ expr: sum(rate(node_network_receive_bytes_total{device=~"vpn.*"}[5m])) without (device)
- record: global:public_network_transmit_bytes_total:rate5m
expr: sum(instance:public_network_transmit_bytes_total:rate5m) without (instance, host)
- record: global:public_network_receive_bytes_total:rate5m
diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml
index 5eaee6b..60b59bb 100644
--- a/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml
+++ b/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml
@@ -9,12 +9,14 @@ groups:
expr: (host:nginx_http_requests_errs:rate5m / host:nginx_http_requests_total:rate5m)
- record: global:nginx_http_requests_total:rate5m
expr: sum(rate(nginx_http_requests[5m])) by (vhost)
+ - record: global:nginx_http_requests_200:rate5m
+ expr: sum(rate(nginx_http_requests{code="200"}[5m])) by (vhost)
- record: global:nginx_http_requests_errs:rate5m
expr: sum(rate(nginx_http_requests{code=~"5.*"}[5m])) by (vhost)
- record: global:nginx_http_requests_errs:ratio
expr: (global:nginx_http_requests_errs:rate5m / global:nginx_http_requests_total:rate5m)
- record: global:nginx_http_cached_requests:ratio
- expr: clamp_max(sum(rate(nginx_http_requests_cache[5m])) by (vhost, cache_status) / ignoring (cache_status) group_left global:nginx_http_requests_total:rate5m, 1)
+ expr: clamp_max(sum(rate(nginx_http_requests_cache[5m])) by (vhost, cache_status) / ignoring (cache_status) group_left global:nginx_http_requests_200:rate5m, 1)
- name: http_requests_ms_histogram
rules:
diff --git a/float/roles/float-infra-service-dashboard/handlers/main.yml b/float/roles/float-infra-service-dashboard/handlers/main.yml
new file mode 100644
index 0000000..3865720
--- /dev/null
+++ b/float/roles/float-infra-service-dashboard/handlers/main.yml
@@ -0,0 +1,6 @@
+---
+
+- listen: reload service-dashboard
+ systemd:
+ name: docker-service-dashboard-http.service
+ state: restarted
diff --git a/float/roles/float-infra-admin-dashboard/tasks/main.yml b/float/roles/float-infra-service-dashboard/tasks/main.yml
index 219108d..17724f9 100644
--- a/float/roles/float-infra-admin-dashboard/tasks/main.yml
+++ b/float/roles/float-infra-service-dashboard/tasks/main.yml
@@ -9,9 +9,10 @@
copy:
dest: "/etc/float/{{ item.name }}"
content: "{{ item.data }}"
- group: docker-admin-dashboard
+ group: docker-service-dashboard
mode: 0640
- notify: "reload admin-dashboard"
+ notify: "reload service-dashboard"
+ no_log: true
with_items:
- name: services.yml
data: "{{ services | to_nice_yaml }}"
diff --git a/float/roles/float-infra-sso-server/defaults/main.yml b/float/roles/float-infra-sso-server/defaults/main.yml
index c0f325c..28c5317 100644
--- a/float/roles/float-infra-sso-server/defaults/main.yml
+++ b/float/roles/float-infra-sso-server/defaults/main.yml
@@ -41,3 +41,7 @@ sso_service_ttls:
# List of allowed Origins for CORS (URLs without path component).
# These are not regular expressions, but you can use wildcards (*).
sso_allowed_cors_origins: []
+
+# When are users asked to authenticate again? (seconds)
+sso_auth_session_lifetime: 43200
+
diff --git a/float/roles/float-infra-sso-server/handlers/main.yml b/float/roles/float-infra-sso-server/handlers/main.yml
index 07ab764..8a883a8 100644
--- a/float/roles/float-infra-sso-server/handlers/main.yml
+++ b/float/roles/float-infra-sso-server/handlers/main.yml
@@ -5,6 +5,8 @@
- name: restart user-meta-server
systemd: name=user-meta-server.service state=restarted
+ # Allow failure when testing backups, the unit can't start until later.
+ ignore_errors: "{{ testing | default(True) }}"
- name: restart auth-server
systemd: name=auth-server.service state=restarted
diff --git a/float/roles/float-infra-sso-server/meta/main.yml b/float/roles/float-infra-sso-server/meta/main.yml
index 1e99df1..00e1770 100644
--- a/float/roles/float-infra-sso-server/meta/main.yml
+++ b/float/roles/float-infra-sso-server/meta/main.yml
@@ -4,3 +4,5 @@ dependencies:
- role: float-util-geoip-dataset
vars:
geoip_dataset: 'Country'
+ - role: float-util-tor-exits-dataset
+
diff --git a/float/roles/float-infra-sso-server/templates/server.yml.j2 b/float/roles/float-infra-sso-server/templates/server.yml.j2
index d6360c4..1d7be5e 100644
--- a/float/roles/float-infra-sso-server/templates/server.yml.j2
+++ b/float/roles/float-infra-sso-server/templates/server.yml.j2
@@ -28,7 +28,7 @@ allowed_services:
allowed_cors_origins: {{ sso_allowed_cors_origins | to_json }}
allowed_exchanges: {{ sso_allowed_exchanges | to_json }}
service_ttls: {{ sso_service_ttls | to_json }}
-auth_session_lifetime: 43200
+auth_session_lifetime: {{ sso_auth_session_lifetime }}
session_auth_key: "{{ sso_session_auth_secret }}"
session_enc_key: "{{ sso_session_enc_secret }}"
csrf_secret: "{{ sso_csrf_secret }}"
@@ -47,8 +47,14 @@ keystore_enable_groups:
url_path_prefix: "{{ sso_server_url_path_prefix }}"
account_recovery_url: "{{ sso_server_account_recovery_url | default('') }}"
default_signed_in_redirect: "{{ sso_server_default_signed_in_redirect | default('') }}"
+cookie_same_site_mode: "{{ sso_cookie_same_site_mode | default('strict') }}"
device_manager:
auth_key: "{{ sso_device_manager_auth_secret }}"
+ zone_maps:
+ - type: ipset
+ path: /var/lib/tor-exits/exit-nodes
+ value: Tor
+ - type: geoip
http_server:
enable_compression: true
request_timeout: 10
@@ -72,3 +78,9 @@ site_name: {{ sso_site_title }}
{% if sso_favicon is defined %}
site_favicon: {{ sso_favicon }}
{% endif %}
+{% if sso_login_username_label is defined %}
+login_username_label: {{ sso_login_username_label }}
+{% endif %}
+{% if sso_login_again_url is defined %}
+login_again_url: "{{ sso_login_again_url }}"
+{% endif %}
diff --git a/float/roles/float-util-credentials/README.md b/float/roles/float-util-credentials/README.md
index de7a8aa..88f1695 100644
--- a/float/roles/float-util-credentials/README.md
+++ b/float/roles/float-util-credentials/README.md
@@ -12,7 +12,7 @@ on the Ansible host.
X509 credentials are stored in /etc/credentials/x509 under directories
named after the services. Every service directory contains a copy of
the public CA certificate, so it can be bind-mounted in a container
-easily.
+easily. There will be separate client and server certificates.
Private keys have mode 440, are owned by root and by a dedicated group
named *service*-credentials. When the service is actually installed,
@@ -24,3 +24,25 @@ list of entries specifying the desired credentials. This is already
done once system-wide by the *float-credentials* role with the
credentials automagically derived from the service definitions by
*float*.
+
+## Multiple PKIs
+
+The role supports credentials from different PKI CAs, each identified
+by a separate *tag*, with *x509* being the tag of the default internal
+float CA.
+
+Additional PKIs are expected to have their CA credentials in the
+*credentials_dir*/*tag* local directory, and will have their
+certificates installed below /etc/credentials/*tag*.
+
+There are two ways, when invoking this role, to specify that a
+different CA from the default should be used:
+
+* By setting the *ca_tag* attribute in the *credentials* map of any of
+ the values passed in the *credentials* variable (yes that's
+ credentials nested twice). This is how float passes the
+ *service_credentials* metadata, so you can just set *ca_tag* there.
+* By setting the *ca_tag* variable in Ansible when including this
+ role, if you are creating certificates manually rather than relying
+ on *service_credentials*.
+
diff --git a/float/roles/float-util-credentials/tasks/main.yml b/float/roles/float-util-credentials/tasks/main.yml
index 907f0ff..b7cf1fe 100644
--- a/float/roles/float-util-credentials/tasks/main.yml
+++ b/float/roles/float-util-credentials/tasks/main.yml
@@ -16,8 +16,10 @@
changed_when: false
register: all_systemd_units
-# Get the credential names from the list of certs.
- set_fact:
+ # Default CA name.
+ default_ca_tag: "{{ ca_tag | default('x509') }}"
+ # Get the credential names from the list of certs.
credentials_names: "{{ credentials | map(attribute='credentials') | map(attribute='name') | unique | list }}"
- name: "Create service credentials group"
@@ -28,18 +30,18 @@
- name: "Create service credentials dirs"
file:
- path: "/etc/credentials/x509/{{ item }}"
+ path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}"
state: directory
- loop: "{{ credentials_names }}"
+ loop: "{{ credentials }}"
- name: Copy CA
copy:
- src: "{{ credentials_dir }}/x509/ca.pem"
- dest: "/etc/credentials/x509/{{ item }}/ca.pem"
+ src: "{{ local_ca_path | default(credentials_dir + '/' + (item.credentials.ca_tag | default(default_ca_tag))) }}/ca.pem"
+ dest: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/ca.pem"
owner: root
group: root
mode: 0644
- loop: "{{ credentials_names }}"
+ loop: "{{ credentials }}"
# Create and sign all certificates in a series of loops (with some
# unfortunately complex change-detection logic).
@@ -47,7 +49,7 @@
block:
- file:
- path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}"
+ path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}"
state: directory
loop: "{{ credentials }}"
@@ -57,9 +59,9 @@
domain: "{{ domain }}"
mode: "{{ item.mode }}"
params: "{{ item.x509_params|default({}) }}"
- private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
- cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
- ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem"
+ private_key_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
+ cert_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/cert.pem"
+ ca_cert_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/ca.pem"
check: true
loop: "{{ credentials }}"
check_mode: no
@@ -72,7 +74,7 @@
domain: "{{ domain }}"
mode: "{{ item.0.mode }}"
params: "{{ item.0.x509_params|default({}) }}"
- private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
+ private_key_path: "/etc/credentials/{{ item.0.credentials.ca_tag | default(default_ca_tag) }}/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem"
check: false
when: "item.1.changed"
loop: "{{ credentials | zip(x509_should_update.results) | list }}"
@@ -82,15 +84,15 @@
x509_sign:
csr: "{{ item.1.csr }}"
mode: "{{ item.0.mode }}"
- ca_cert_path: "{{ credentials_dir }}/x509/ca.pem"
- ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem"
+ ca_cert_path: "{{ local_ca_path | default(credentials_dir + '/' + (item.0.credentials.ca_tag | default(default_ca_tag))) }}/ca.pem"
+ ca_key_path: "{{ local_ca_path | default(credentials_dir + '/' + (item.0.credentials.ca_tag | default(default_ca_tag))) }}/ca_private_key.pem"
when: "item.1.changed"
loop: "{{ credentials | zip(x509_csr.results) | list }}"
register: x509_sign
- name: "Install the signed internal PKI certificates"
copy:
- dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
+ dest: "/etc/credentials/{{ item.0.credentials.ca_tag | default(default_ca_tag) }}/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem"
content: "{{ item.1.cert }}"
mode: 0644
when: "item.1.changed"
@@ -98,7 +100,7 @@
- name: "Set permissions on the private keys"
file:
- path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
+ path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem"
group: "{{ item.credentials.name }}-credentials"
mode: 0640
loop: "{{ credentials }}"
@@ -112,4 +114,3 @@
rescue:
- debug:
msg: "Failed to set up one or more credentials"
-
diff --git a/float/roles/float-util-tor-exits-dataset/README.md b/float/roles/float-util-tor-exits-dataset/README.md
new file mode 100644
index 0000000..401c58a
--- /dev/null
+++ b/float/roles/float-util-tor-exits-dataset/README.md
@@ -0,0 +1,2 @@
+Role to install a cron job that periodically updates a list of Tor
+exit node IPs in /var/lib/tor-exits/exit-nodes.
diff --git a/float/roles/float-util-tor-exits-dataset/files/is-tor-exit b/float/roles/float-util-tor-exits-dataset/files/is-tor-exit
new file mode 100644
index 0000000..2a3f470
--- /dev/null
+++ b/float/roles/float-util-tor-exits-dataset/files/is-tor-exit
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+exit_nodes_file="/var/lib/tor-exits/exit-nodes"
+
+test -e ${exit_nodes_file} || exit 1
+exec grep -q "^$1\$" ${exit_nodes_file}
diff --git a/float/roles/float-util-tor-exits-dataset/files/update-tor-exits b/float/roles/float-util-tor-exits-dataset/files/update-tor-exits
new file mode 100644
index 0000000..b4132c3
--- /dev/null
+++ b/float/roles/float-util-tor-exits-dataset/files/update-tor-exits
@@ -0,0 +1,15 @@
+#!/bin/sh
+#
+# Update list of Tor exit nodes in /var/lib/tor-exits/exit-nodes.
+#
+
+url="https://www.dan.me.uk/torlist/?exit"
+output="/var/lib/tor-exits/exit-nodes"
+tmpfile="/var/lib/tor-exits/.exit-nodes.tmp"
+
+trap "rm -f ${tmpfile} 2>/dev/null" EXIT
+
+curl --silent --fail --output "${tmpfile}" "${url}" && \
+ mv -f "${tmpfile}" "${output}"
+
+exit $?
diff --git a/float/roles/float-util-tor-exits-dataset/tasks/main.yml b/float/roles/float-util-tor-exits-dataset/tasks/main.yml
new file mode 100644
index 0000000..40645f2
--- /dev/null
+++ b/float/roles/float-util-tor-exits-dataset/tasks/main.yml
@@ -0,0 +1,31 @@
+---
+
+- name: Create tor-exits state directory
+ file:
+ path: "/var/lib/tor-exits"
+ state: directory
+
+- name: Install tor-exits scripts
+ copy:
+ src: "{{ item }}"
+ dest: "/usr/local/bin/{{ item }}"
+ mode: "0755"
+ loop:
+ - "update-tor-exits"
+ - "is-tor-exit"
+
+- name: Set up tor-exits update cron job
+ copy:
+ dest: "/etc/cron.d/update-tor-exits"
+ content: "55 */3 * * * root /usr/local/bin/splay 60 && /usr/local/bin/update-tor-exits >/dev/null\n"
+
+# Run the script right away on first install. It might fail on testing
+# environments due to strict rate-limiting on the source URL.
+- stat:
+ path: "/var/lib/tor-exits/exit-nodes"
+ register: tor_exits_dataset
+
+- name: Update list of Tor exit nodes
+ command: "/usr/local/bin/update-tor-exits"
+ when: "not tor_exits_dataset.stat.exists"
+ ignore_errors: true
diff --git a/float/scripts/floatup.py b/float/scripts/floatup.py
index 465bfff..5647d7d 100755
--- a/float/scripts/floatup.py
+++ b/float/scripts/floatup.py
@@ -87,7 +87,7 @@ def encode_dashboard_request(req):
return base64.urlsafe_b64encode(comp.flush()).decode('ascii')
-def install_ssh_key():
+def install_vagrant_ssh_key():
# Install the SSH key as Vagrant would do, for compatibility.
key_path = os.path.join(
os.getenv('HOME'), '.vagrant.d', 'insecure_private_key')
@@ -134,6 +134,13 @@ def main():
'--dashboard-url', metavar='URL',
help='vmine dashboard base URL (for Gitlab CI)')
parser.add_argument(
+ '--ssh-key', metavar='FILE',
+ type=argparse.FileType('r'),
+ help='root SSH key to install on VMs')
+ parser.add_argument(
+ '--name', metavar='NAME',
+ help='group name (for named groups)')
+ parser.add_argument(
'cmd',
choices=['up', 'down'])
args = parser.parse_args()
@@ -148,6 +155,12 @@ def main():
host_attrs['image'] = args.image
req = parse_inventory(args.inventory, host_attrs)
req['ttl'] = args.ttl
+ if args.name:
+ req['name'] = args.name
+ if args.ssh_key:
+ req['ssh_key'] = args.ssh_key
+ else:
+ install_vagrant_ssh_key()
print(f'creating VM group with attrs {host_attrs} ...')
print(f'vmine request: {req}')
@@ -157,8 +170,6 @@ def main():
fd.write(group_id)
print(f'created VM group {group_id}')
- install_ssh_key()
-
if args.env:
with open(args.env, 'w') as fd:
fd.write(f'VMINE_ID={group_id}\n')
@@ -168,16 +179,22 @@ def main():
fd.write(f'VMINE_GROUP_URL={base_url}/dash/{payload}\n')
elif args.cmd == 'down':
- try:
- with open(args.state_file) as fd:
- group_id = fd.read().strip()
- except FileNotFoundError:
- print('state file not found, exiting')
- return
- print(f'stopping VM group {group_id}...')
- do_request(args.url + '/api/stop-group', args.ssh,
- {'group_id': group_id})
- os.remove(args.state_file)
+ req = {}
+ if args.name:
+ req['name'] = args.name
+ print(f'stopping VM group {args.name}...')
+ else:
+ try:
+ with open(args.state_file) as fd:
+ group_id = fd.read().strip()
+ except FileNotFoundError:
+ print('state file not found, exiting')
+ return
+ req['group_id'] = group_id
+ print(f'stopping VM group {group_id}...')
+ do_request(args.url + '/api/stop-group', args.ssh, req)
+ if args.state_file:
+ os.remove(args.state_file)
if __name__ == '__main__':
diff --git a/float/services.core.yml b/float/services.core.yml
new file mode 100644
index 0000000..2ae62b7
--- /dev/null
+++ b/float/services.core.yml
@@ -0,0 +1,325 @@
+---
+
+frontend:
+ scheduling_group: frontend
+ service_credentials:
+ - name: nginx
+ enable_server: false
+ - name: ssoproxy
+ enable_server: false
+ - name: replds-acme
+ systemd_services:
+ - nginx.service
+ - haproxy.service
+ - sso-proxy.service
+ - replds@acme.service
+ ports:
+ - 5005
+ volumes:
+ - name: cache
+ path: /var/cache/nginx
+ size: 20g
+ monitoring_endpoints:
+ - port: 8404
+ scheme: http
+
+dns:
+ scheduling_group: frontend
+ systemd_services:
+ - bind9.service
+ monitoring_endpoints:
+ - name: bind
+ port: 9119
+ scheme: http
+
+log-collector:
+ scheduling_group: backend
+ num_instances: 1
+ service_credentials:
+ - name: log-collector
+ enable_client: false
+ monitoring_endpoints:
+ - port: 9105
+ scheme: http
+ containers:
+ - name: rsyslog
+ image: registry.git.autistici.org/ai3/docker/rsyslog:master
+ ports:
+ - 6514
+ - 9105
+ volumes:
+ - /etc/rsyslog-collector.conf: /etc/rsyslog.conf
+ - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm
+ - /var/spool/rsyslog-collector: /var/spool/rsyslog
+ - /var/log/remote: /var/log/remote
+ egress_policy: internal
+ ports:
+ - 6514
+
+prometheus:
+ scheduling_group: backend
+ num_instances: 1
+ service_credentials:
+ - { name: prometheus }
+ containers:
+ - name: prometheus
+ image: registry.git.autistici.org/ai3/docker/prometheus:master
+ port: 9090
+ volumes:
+ - /etc/prometheus: /etc/prometheus
+ - /var/lib/prometheus/metrics2: /var/lib/prometheus/metrics2
+ args: "--storage.tsdb.retention.time={{ prometheus_tsdb_retention | default('90d') }} --web.external-url=https://monitor.{{ domain_public[0] }} --web.enable-lifecycle --query.max-samples={{ prometheus_max_samples | default('5000000') }}"
+ - name: alertmanager
+ image: registry.git.autistici.org/ai3/docker/prometheus-alertmanager:master
+ ports:
+ - 9093
+ - 9094
+ volumes:
+ - /etc/prometheus: /etc/prometheus
+ - /var/lib/prometheus/alertmanager: /var/lib/prometheus/alertmanager
+ args: "--web.external-url=https://alertmanager.{{ domain_public[0] }} --cluster.listen-address=:9094 --cluster.advertise-address={{ float_host_dns_map.get(inventory_hostname + '.prometheus', ['']) | list | first }}:9094{% for h in groups['prometheus']|sort if h != inventory_hostname %} --cluster.peer={{ h }}.prometheus.{{ domain }}:9094{% endfor %}"
+ - name: blackbox
+ image: registry.git.autistici.org/ai3/docker/prometheus-blackbox:master
+ ports:
+ - 9115
+ volumes:
+ - /etc/prometheus: /etc/prometheus
+ args: "--config.file /etc/prometheus/blackbox.yml"
+ docker_options: "--cap-add=NET_RAW"
+ drop_capabilities: false
+ - name: grafana
+ image: registry.git.autistici.org/ai3/docker/grafana:master
+ port: 2929
+ volumes:
+ - /etc/grafana: /etc/grafana
+ - /var/lib/grafana: /var/lib/grafana
+ egress_policy: internal
+ - name: thanos
+ image: registry.git.autistici.org/ai3/docker/thanos:master
+ ports:
+ - 10901 # sidecar grpc
+ - 10902 # sidecar http
+ - 10903 # query grpc
+ - 10904 # query http
+ - 10905 # query-frontend grpc
+ - 10906 # query-frontend http
+ resources:
+ ram: "1G"
+ env:
+ QUERY_FLAGS: "--query.replica-label=monitor {% for h in groups['prometheus']|sort %} --store={{ h }}.prometheus.{{ domain }}:10901{% endfor %}"
+ SIDECAR_FLAGS: ""
+ QUERY_FRONTEND_FLAGS: "--query-range.response-cache-config-file=/etc/thanos/query-frontend-cache.yml"
+ volumes:
+ - /etc/thanos: /etc/thanos
+ egress_policy: internal
+ - name: karma
+ image: registry.git.autistici.org/ai3/docker/karma:master
+ ports:
+ - 9193
+ env:
+ # https://github.com/prymitive/karma/blob/master/docs/CONFIGURATION.md#environment-variables
+ CONFIG_FILE: "/etc/karma/float.yml"
+ PORT: 9193
+ volumes:
+ - /etc/karma: /etc/karma
+ egress_policy: internal
+ public_endpoints:
+ - name: monitor
+ port: 9090
+ scheme: http
+ enable_sso_proxy: true
+ - name: prober
+ port: 9115
+ scheme: http
+ enable_sso_proxy: true
+ - name: grafana
+ port: 2929
+ scheme: https
+ enable_sso_proxy: true
+ - name: thanos
+ port: 10906
+ scheme: http
+ enable_sso_proxy: true
+ - name: alerts
+ port: 9193
+ scheme: http
+ enable_sso_proxy: true
+ monitoring_endpoints:
+ - port: 9090
+ scheme: http
+ healthcheck_http_method: OPTIONS
+ - port: 9093
+ scheme: http
+ healthcheck_http_method: OPTIONS
+ - port: 9193
+ scheme: http
+ healthcheck_http_method: GET
+ - port: 2929
+ scheme: https
+ - port: 10904
+ scheme: http
+ - port: 10902
+ scheme: http
+ - port: 10906
+ scheme: http
+ ports:
+ - 9094
+ - 10901
+ volumes:
+ - name: metrics
+ path: /var/lib/prometheus
+ owner: docker-prometheus
+ group: docker-prometheus
+ mode: "0755"
+ annotations:
+ dependencies:
+ - client: prometheus
+ server: alertmanager
+ - client: karma
+ server: alertmanager
+ - client: thanos
+ server: prometheus
+
+sso-server:
+ num_instances: 1
+ scheduling_group: backend
+ service_credentials:
+ - name: sso-server
+ enable_server: false
+ public_endpoints:
+ - name: login
+ port: 5002
+ scheme: http
+ monitoring_endpoints:
+ - port: 5002
+ scheme: http
+ systemd_services:
+ - sso-server.service
+ annotations:
+ dependencies:
+ - client: sso-server
+ server: user-meta-server/user-meta-server
+
+auth-cache:
+ scheduling_group: backend
+ containers:
+ - name: memcache
+ image: registry.git.autistici.org/ai3/docker/memcached:master
+ ports:
+ - 11212
+ - 11213
+ env:
+ PORT: "11212"
+ egress_policy: internal
+ ports:
+ - 11212
+ monitoring_endpoints:
+ - port: 11213
+ scheme: http
+
+user-meta-server:
+ num_instances: 1
+ scheduling_group: backend
+ service_credentials:
+ - name: user-meta-server
+ monitoring_endpoints:
+ - port: 5505
+ scheme: https
+ ports:
+ - 5505
+ systemd_services:
+ - user-meta-server.service
+ datasets:
+ - name: db
+ type: litestream
+ path: /var/lib/user-meta-server
+ filename: usermeta.db
+ owner: user-meta-server
+ litestream_params:
+ sync-interval: "60s"
+
+service-dashboard:
+ scheduling_group: frontend
+ service_credentials:
+ - name: service-dashboard
+ containers:
+ - name: http
+ image: registry.git.autistici.org/ai3/tools/float-dashboard:master
+ port: 8011
+ volumes:
+ - /etc/float: /etc/float
+ env:
+ ADDR: ":8011"
+ DOMAIN: "{{ domain_public[0] }}"
+ egress_policy: internal
+ public_endpoints:
+ - name: service-dashboard
+ port: 8011
+ scheme: http
+ enable_sso_proxy: true
+
+backup-metadata:
+ num_instances: 1
+ scheduling_group: backend
+ service_credentials:
+ - name: backup-metadata
+ enable_client: false
+ monitoring_endpoints:
+ - port: 5332
+ scheme: https
+ public_endpoints:
+ - name: backups
+ port: 5332
+ scheme: https
+ enable_sso_proxy: true
+ ports:
+ - 5332
+ systemd_services:
+ - tabacco-metadb.service
+ datasets:
+ - name: db
+ type: litestream
+ path: /var/lib/tabacco-metadb
+ filename: meta.db
+ owner: backup-metadata
+
+acme:
+ num_instances: 1
+ scheduling_group: frontend
+ service_credentials:
+ - name: acme
+ enable_server: false
+ monitoring_endpoints:
+ - port: 5004
+ scheme: http
+ ports:
+ - 5004
+ systemd_services:
+ - acmeserver.service
+
+assets:
+ num_instances: 1
+ scheduling_group: backend
+ service_credentials:
+ - name: assetmon
+ containers:
+ - name: http
+ image: registry.git.autistici.org/ai3/tools/assetmon:master
+ volumes:
+ - /etc/assetmon/server.yml: /etc/assetmon/server.yml
+ - /var/lib/assetmon: /var/lib/assetmon
+ ports:
+ - 3798
+ egress_policy: internal
+ monitoring_endpoints:
+ - port: 3798
+ scheme: https
+ public_endpoints:
+ - name: assets
+ port: 3798
+ scheme: https
+ enable_sso_proxy: true
+ datasets:
+ - name: db
+ path: /var/lib/assetmon
+ owner: docker-assets
diff --git a/float/services.default.yml b/float/services.default.yml
new file mode 100644
index 0000000..7d07664
--- /dev/null
+++ b/float/services.default.yml
@@ -0,0 +1,105 @@
+---
+
+include:
+ - "services.core.yml"
+
+reports-collector:
+ scheduling_group: frontend
+ containers:
+ - name: http
+ image: registry.git.autistici.org/ai3/tools/reports-collector:master
+ ports:
+ - 3995
+ - 3996
+ env:
+ ADDR: ":3995"
+ SMTP_ADDR: ":3996"
+ volumes:
+ - /var/lib/GeoIP: /var/lib/GeoIP
+ public_endpoints:
+ - name: live-reports
+ port: 3995
+ scheme: http
+ monitoring_endpoints:
+ - port: 3995
+ scheme: http
+ ports:
+ - 3996
+
+log-collector:
+ scheduling_group: backend
+ num_instances: 1
+ service_credentials:
+ - name: log-collector
+ enable_client: false
+ monitoring_endpoints:
+ - port: 9105
+ scheme: http
+ - port: 9201
+ scheme: http
+ public_endpoints:
+ - name: logs
+ port: 5601
+ scheme: http
+ enable_sso_proxy: true
+ containers:
+ - name: rsyslog
+ image: registry.git.autistici.org/ai3/docker/rsyslog:master
+ ports:
+ - 6514
+ - 9105
+ volumes:
+ - /etc/rsyslog-collector.conf: /etc/rsyslog.conf
+ - /etc/rsyslog-collector: /etc/rsyslog-collector
+ - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm
+ - /var/spool/rsyslog-collector: /var/spool/rsyslog
+ - /var/log/remote: /var/log/remote
+ egress_policy: internal
+ - name: kibana
+ image: registry.git.autistici.org/ai3/docker/kibana:master
+ port: 5601
+ volumes:
+ - /etc/kibana: /etc/kibana
+ - /var/lib/kibana: /var/lib/kibana
+ env:
+ BABEL_CACHE_PATH: "/var/lib/kibana/.babelcache.json"
+ - name: elasticsearch
+ image: registry.git.autistici.org/ai3/docker/elasticsearch:master
+ port: 9200
+ volumes:
+ - /etc/elasticsearch: /etc/elasticsearch
+ - /var/lib/elasticsearch: /var/lib/elasticsearch
+ - /var/log/elasticsearch: /var/log/elasticsearch
+ env:
+ PORT: 9200
+ EXPORTER_PORT: 9201
+ ports:
+ - 6514
+ - 9200
+ volumes:
+ - name: elasticsearch
+ path: /var/lib/elasticsearch
+ size: 100g
+ owner: docker-log-collector
+ group: docker-log-collector
+ mode: "0700"
+ annotations:
+ dependencies:
+ - client: kibana
+ server: elasticsearch
+ - client: log-collector-e2e/prober
+ server: elasticsearch
+
+log-collector-e2e:
+ scheduling_group: all
+ containers:
+ - name: prober
+ image: registry.git.autistici.org/ai3/tools/dye-injector:master
+ port: 7094
+ env:
+ ADDR: ":7094"
+ monitoring_endpoints:
+ - name: log-collector-e2e-prober
+ port: 7094
+ scheme: http
+
diff --git a/float/services.yml.default b/float/services.yml.default
index c7c67a6..77b8ad4 100644..120000
--- a/float/services.yml.default
+++ b/float/services.yml.default
@@ -1,105 +1 @@
----
-
-include:
- - "services.yml.no-elasticsearch"
-
-reports-collector:
- scheduling_group: frontend
- containers:
- - name: http
- image: registry.git.autistici.org/ai3/tools/reports-collector:master
- ports:
- - 3995
- - 3996
- env:
- ADDR: ":3995"
- SMTP_ADDR: ":3996"
- volumes:
- - /var/lib/GeoIP: /var/lib/GeoIP
- public_endpoints:
- - name: live-reports
- port: 3995
- scheme: http
- monitoring_endpoints:
- - port: 3995
- scheme: http
- ports:
- - 3996
-
-log-collector:
- scheduling_group: backend
- num_instances: 1
- service_credentials:
- - name: log-collector
- enable_client: false
- monitoring_endpoints:
- - port: 9105
- scheme: http
- - port: 9201
- scheme: http
- public_endpoints:
- - name: logs
- port: 5601
- scheme: http
- enable_sso_proxy: true
- containers:
- - name: rsyslog
- image: registry.git.autistici.org/ai3/docker/rsyslog:master
- ports:
- - 6514
- - 9105
- volumes:
- - /etc/rsyslog-collector.conf: /etc/rsyslog.conf
- - /etc/rsyslog-collector: /etc/rsyslog-collector
- - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm
- - /var/spool/rsyslog-collector: /var/spool/rsyslog
- - /var/log/remote: /var/log/remote
- egress_policy: internal
- - name: kibana
- image: registry.git.autistici.org/ai3/docker/kibana:master
- port: 5601
- volumes:
- - /etc/kibana: /etc/kibana
- - /var/lib/kibana: /var/lib/kibana
- env:
- BABEL_CACHE_PATH: "/var/lib/kibana/.babelcache.json"
- - name: elasticsearch
- image: registry.git.autistici.org/ai3/docker/elasticsearch:master
- port: 9200
- volumes:
- - /etc/elasticsearch: /etc/elasticsearch
- - /var/lib/elasticsearch: /var/lib/elasticsearch
- - /var/log/elasticsearch: /var/log/elasticsearch
- env:
- PORT: 9200
- EXPORTER_PORT: 9201
- ports:
- - 6514
- - 9200
- volumes:
- - name: elasticsearch
- path: /var/lib/elasticsearch
- size: 100g
- owner: docker-log-collector
- group: docker-log-collector
- mode: "0700"
- annotations:
- dependencies:
- - client: kibana
- server: elasticsearch
- - client: log-collector-e2e/prober
- server: elasticsearch
-
-log-collector-e2e:
- scheduling_group: all
- containers:
- - name: prober
- image: registry.git.autistici.org/ai3/tools/dye-injector:master
- port: 7094
- env:
- ADDR: ":7094"
- monitoring_endpoints:
- - name: log-collector-e2e-prober
- port: 7094
- scheme: http
-
+services.default.yml \ No newline at end of file
diff --git a/float/services.yml.no-elasticsearch b/float/services.yml.no-elasticsearch
index 3665352..3685dcf 100644..120000
--- a/float/services.yml.no-elasticsearch
+++ b/float/services.yml.no-elasticsearch
@@ -1,318 +1 @@
----
-
-frontend:
- scheduling_group: frontend
- service_credentials:
- - name: nginx
- enable_server: false
- - name: ssoproxy
- enable_server: false
- - name: replds-acme
- systemd_services:
- - nginx.service
- - haproxy.service
- - sso-proxy.service
- - replds@acme.service
- ports:
- - 5005
- volumes:
- - name: cache
- path: /var/cache/nginx
- size: 20g
- monitoring_endpoints:
- - port: 8404
- scheme: http
-
-dns:
- scheduling_group: frontend
- systemd_services:
- - bind9.service
- monitoring_endpoints:
- - name: bind
- port: 9119
- scheme: http
-
-log-collector:
- scheduling_group: backend
- num_instances: 1
- service_credentials:
- - name: log-collector
- enable_client: false
- monitoring_endpoints:
- - port: 9105
- scheme: http
- containers:
- - name: rsyslog
- image: registry.git.autistici.org/ai3/docker/rsyslog:master
- ports:
- - 6514
- - 9105
- volumes:
- - /etc/rsyslog-collector.conf: /etc/rsyslog.conf
- - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm
- - /var/spool/rsyslog-collector: /var/spool/rsyslog
- - /var/log/remote: /var/log/remote
- egress_policy: internal
- ports:
- - 6514
-
-prometheus:
- scheduling_group: backend
- num_instances: 1
- service_credentials:
- - { name: prometheus }
- containers:
- - name: prometheus
- image: registry.git.autistici.org/ai3/docker/prometheus:master
- port: 9090
- volumes:
- - /etc/prometheus: /etc/prometheus
- - /var/lib/prometheus/metrics2: /var/lib/prometheus/metrics2
- args: "--storage.tsdb.retention.time={{ prometheus_tsdb_retention | default('90d') }} --web.external-url=https://monitor.{{ domain_public[0] }} --web.enable-lifecycle --query.max-samples={{ prometheus_max_samples | default('5000000') }}"
- - name: alertmanager
- image: registry.git.autistici.org/ai3/docker/prometheus-alertmanager:master
- ports:
- - 9093
- - 9094
- volumes:
- - /etc/prometheus: /etc/prometheus
- - /var/lib/prometheus/alertmanager: /var/lib/prometheus/alertmanager
- args: "--web.external-url=https://alertmanager.{{ domain_public[0] }} --cluster.listen-address=:9094 --cluster.advertise-address={{ float_host_dns_map.get(inventory_hostname + '.prometheus', ['']) | list | first }}:9094{% for h in groups['prometheus']|sort if h != inventory_hostname %} --cluster.peer={{ h }}.prometheus.{{ domain }}:9094{% endfor %}"
- - name: blackbox
- image: registry.git.autistici.org/ai3/docker/prometheus-blackbox:master
- ports:
- - 9115
- volumes:
- - /etc/prometheus: /etc/prometheus
- args: "--config.file /etc/prometheus/blackbox.yml"
- docker_options: "--cap-add=NET_RAW"
- drop_capabilities: false
- - name: grafana
- image: registry.git.autistici.org/ai3/docker/grafana:master
- port: 2929
- volumes:
- - /etc/grafana: /etc/grafana
- - /var/lib/grafana: /var/lib/grafana
- egress_policy: internal
- - name: thanos
- image: registry.git.autistici.org/ai3/docker/thanos:master
- ports:
- - 10901 # sidecar grpc
- - 10902 # sidecar http
- - 10903 # query grpc
- - 10904 # query http
- - 10905 # query-frontend grpc
- - 10906 # query-frontend http
- resources:
- ram: "1G"
- env:
- QUERY_FLAGS: "--query.replica-label=monitor {% for h in groups['prometheus']|sort %} --store={{ h }}.prometheus.{{ domain }}:10901{% endfor %}"
- SIDECAR_FLAGS: ""
- QUERY_FRONTEND_FLAGS: "--query-range.response-cache-config-file=/etc/thanos/query-frontend-cache.yml"
- volumes:
- - /etc/thanos: /etc/thanos
- egress_policy: internal
- - name: karma
- image: registry.git.autistici.org/ai3/docker/karma:master
- ports:
- - 9193
- env:
- # https://github.com/prymitive/karma/blob/master/docs/CONFIGURATION.md#environment-variables
- CONFIG_FILE: "/etc/karma/float.yml"
- PORT: 9193
- volumes:
- - /etc/karma: /etc/karma
- egress_policy: internal
- public_endpoints:
- - name: monitor
- port: 9090
- scheme: http
- enable_sso_proxy: true
- - name: prober
- port: 9115
- scheme: http
- enable_sso_proxy: true
- - name: grafana
- port: 2929
- scheme: https
- enable_sso_proxy: true
- - name: thanos
- port: 10906
- scheme: http
- enable_sso_proxy: true
- - name: alerts
- port: 9193
- scheme: http
- enable_sso_proxy: true
- monitoring_endpoints:
- - port: 9090
- scheme: http
- healthcheck_http_method: OPTIONS
- - port: 9093
- scheme: http
- healthcheck_http_method: OPTIONS
- - port: 9193
- scheme: http
- healthcheck_http_method: GET
- - port: 2929
- scheme: https
- - port: 10904
- scheme: http
- - port: 10902
- scheme: http
- - port: 10906
- scheme: http
- ports:
- - 9094
- - 10901
- volumes:
- - name: metrics
- path: /var/lib/prometheus
- owner: docker-prometheus
- group: docker-prometheus
- mode: "0755"
- annotations:
- dependencies:
- - client: prometheus
- server: alertmanager
- - client: karma
- server: alertmanager
- - client: thanos
- server: prometheus
-
-sso-server:
- num_instances: 1
- scheduling_group: backend
- service_credentials:
- - name: sso-server
- enable_server: false
- public_endpoints:
- - name: login
- port: 5002
- scheme: http
- monitoring_endpoints:
- - port: 5002
- scheme: http
- systemd_services:
- - sso-server.service
- annotations:
- dependencies:
- - client: sso-server
- server: user-meta-server/user-meta-server
-
-auth-cache:
- scheduling_group: backend
- containers:
- - name: memcache
- image: registry.git.autistici.org/ai3/docker/memcached:master
- port: 11212
- env:
- PORT: "11212"
- egress_policy: internal
- ports:
- - 11212
-
-user-meta-server:
- num_instances: 1
- scheduling_group: backend
- service_credentials:
- - name: user-meta-server
- monitoring_endpoints:
- - port: 5505
- scheme: https
- ports:
- - 5505
- systemd_services:
- - user-meta-server.service
- datasets:
- - name: db
- type: litestream
- path: /var/lib/user-meta-server
- filename: usermeta.db
- owner: user-meta-server
-
-admin-dashboard:
- scheduling_group: frontend
- service_credentials:
- - name: admin-dashboard
- containers:
- - name: http
- image: registry.git.autistici.org/ai3/tools/float-dashboard:master
- port: 8011
- volumes:
- - /etc/float: /etc/float
- env:
- ADDR: ":8011"
- DOMAIN: "{{ domain_public[0] }}"
- egress_policy: internal
- public_endpoints:
- - name: admin
- port: 8011
- scheme: http
- enable_sso_proxy: true
-
-backup-metadata:
- num_instances: 1
- scheduling_group: backend
- service_credentials:
- - name: backup-metadata
- enable_client: false
- monitoring_endpoints:
- - port: 5332
- scheme: https
- public_endpoints:
- - name: backups
- port: 5332
- scheme: https
- enable_sso_proxy: true
- ports:
- - 5332
- systemd_services:
- - tabacco-metadb.service
- datasets:
- - name: db
- type: litestream
- path: /var/lib/tabacco-metadb
- filename: meta.db
- owner: backup-metadata
-
-acme:
- num_instances: 1
- scheduling_group: frontend
- service_credentials:
- - name: acme
- enable_server: false
- monitoring_endpoints:
- - port: 5004
- scheme: http
- ports:
- - 5004
- systemd_services:
- - acmeserver.service
-
-assets:
- num_instances: 1
- scheduling_group: backend
- service_credentials:
- - name: assetmon
- containers:
- - name: http
- image: registry.git.autistici.org/ai3/tools/assetmon:master
- volumes:
- - /etc/assetmon/server.yml: /etc/assetmon/server.yml
- - /var/lib/assetmon: /var/lib/assetmon
- ports:
- - 3798
- egress_policy: internal
- monitoring_endpoints:
- - port: 3798
- scheme: https
- public_endpoints:
- - name: assets
- port: 3798
- scheme: https
- enable_sso_proxy: true
- datasets:
- - name: db
- path: /var/lib/assetmon
- owner: docker-assets
+services.core.yml \ No newline at end of file
diff --git a/float/test-driver b/float/test-driver
index 12e943b..863c717 100755
--- a/float/test-driver
+++ b/float/test-driver
@@ -56,7 +56,7 @@ save_logs() {
log "Saving logs from VMs (if any)"
mkdir -p "${out_dir}"
ANSIBLE_STDOUT_CALLBACK=null \
- ${float_dir}/float run -e "callback_whitelist=" -e "out_dir=${out_dir}" \
+ ${float_dir}/float run -e "callbacks_enabled=" -e "out_dir=${out_dir}" \
${float_dir}/test/save-logs.yml
}
@@ -92,9 +92,12 @@ Commands:
cleanup Cleanup the test environment (turn down VMs, etc)
- run Run the main float playbook, and the test suite, using
- the playbooks specified by the remaining command-line
- arguments
+ provision Run the main float playbook
+
+ test Run the test suite, using the playbooks specified by
+ the remaining command-line arguments
+
+ run (deprecated) Run 'provision' and 'test'
If DIR is specified, chdir there before running anything.
@@ -119,6 +122,20 @@ if [ $# -gt 0 ]; then
shift
fi
+do_provision() {
+ log Running main playbook
+ ${float_dir}/float run site.yml \
+ || die "failed to run the main playbook"
+}
+
+do_test() {
+ for playbook in "${float_dir}/test/integration-test.yml" "$@"; do
+ log Running test playbook ${playbook}
+ ${float_dir}/float run ${playbook} \
+ || die "test playbook failed"
+ done
+}
+
case "$cmd" in
init)
run_init
@@ -134,16 +151,18 @@ case "$cmd" in
run_cleanup
;;
+ provision)
+ do_provision
+ ;;
+
+ test)
+ do_test
+ ;;
+
+ # Legacy command to run both 'provision' and 'test' at once.
run)
- log Running main playbook
- ${float_dir}/float run site.yml \
- || die "failed to run the main playbook"
-
- for playbook in "${float_dir}/test/integration-test.yml" "$@"; do
- log Running test playbook ${playbook}
- ${float_dir}/float run ${playbook} \
- || die "test playbook failed"
- done
+ do_provision
+ do_test
;;
*)
diff --git a/float/test/README.md b/float/test/README.md
index 82d8db7..86a856f 100644
--- a/float/test/README.md
+++ b/float/test/README.md
@@ -135,8 +135,8 @@ clear speed improvement.
This is, in fact, already the default for test environments created
with "float create-env" and it can be achieved by:
-* importing "services.yml.no-elasticsearch" instead of
- "services.yml.default" for the float built-in service definitions;
+* importing "services.core.yml" instead of
+ "services.default.yml" for the float built-in service definitions;
* setting the configuration variable "enable_elasticsearch" to false.
The resulting environment will still run the *log-collector* service,
diff --git a/float/test/backup.ref/services.yml b/float/test/backup.ref/services.yml
index 8db1002..48d3bf3 100644
--- a/float/test/backup.ref/services.yml
+++ b/float/test/backup.ref/services.yml
@@ -1,7 +1,7 @@
---
include:
- - "../../services.yml.no-elasticsearch"
+ - "../../services.core.yml"
ok:
scheduling_group: backend
@@ -10,7 +10,7 @@ ok:
image: registry.git.autistici.org/ai3/docker/okserver:latest
port: 3100
env:
- PORT: 3100
+ ADDR: ":3100"
resources:
ram: 1g
cpu: 0.5
diff --git a/float/test/backup.ref/site.yml b/float/test/backup.ref/site.yml
index 601f945..7d5a194 100644
--- a/float/test/backup.ref/site.yml
+++ b/float/test/backup.ref/site.yml
@@ -7,4 +7,11 @@
- name: Create the test bucket
run_once: true
command: "podman run --env MC_HOST_local=http://minio:miniopassword@backup:9000 --network host --rm quay.io/minio/mc mb local/backuptest"
+ - systemd:
+ name: tabacco-metadb.service
+ state: restarted
+ - systemd:
+ name: user-meta-server.service
+ state: restarted
+ - local_action: shell sleep 60
diff --git a/float/test/base.ref/services.yml b/float/test/base.ref/services.yml
index 0ed283e..ce0e7a8 100644
--- a/float/test/base.ref/services.yml
+++ b/float/test/base.ref/services.yml
@@ -32,7 +32,7 @@ ok:
image: registry.git.autistici.org/ai3/docker/okserver:latest
port: 3100
env:
- PORT: 3100
+ ADDR: ":3100"
public_endpoints:
- name: ok
port: 3100
diff --git a/float/test/float_integration_test/http.py b/float/test/float_integration_test/http.py
index 261aceb..c82ca7e 100644
--- a/float/test/float_integration_test/http.py
+++ b/float/test/float_integration_test/http.py
@@ -1,3 +1,4 @@
+import html
import http.cookiejar
import http.client
import os
@@ -40,10 +41,10 @@ class SSOHandler(urllib.request.BaseHandler):
self._login_form_url = login_server.rstrip('/') + '/login'
self._auth_notify_cb = auth_notify_cb
- def _extract_hidden_form_data(self, html):
+ def _extract_hidden_form_data(self, body):
form = {}
- for name, value in self._form_pattern.findall(html):
- form[name] = value
+ for name, value in self._form_pattern.findall(body):
+ form[name] = html.unescape(value)
return form
def https_response(self, req, resp):
@@ -101,7 +102,8 @@ def _build_opener(ipaddr, follow_redirects=False, *extra_handlers):
# Create a tolerant SSL context that accepts the self-signed
# certificates used by the testing environment.
ssl_context = ssl.create_default_context(
- ssl.Purpose.CLIENT_AUTH)
+ purpose=ssl.Purpose.SERVER_AUTH)
+ ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
debuglevel = 1 if os.getenv('HTTP_TRACE') else 0
diff --git a/float/test/float_integration_test/test_system.py b/float/test/float_integration_test/test_system.py
index d07823c..7b08887 100644
--- a/float/test/float_integration_test/test_system.py
+++ b/float/test/float_integration_test/test_system.py
@@ -43,7 +43,7 @@ class TestBuiltinServiceURLs(URLTestBase):
"""Verify that all the public_endpoints are reachable.
Tests will only run if the corresponding service (from
- services.yml.default) is actually enabled.
+ services.yml) is actually enabled.
"""
@@ -57,8 +57,8 @@ class TestBuiltinServiceURLs(URLTestBase):
def test_okserver(self):
self.assert_endpoint_ok_if_enabled('ok', 'ok')
- def test_admin_dashboard(self):
- self.assert_endpoint_ok_if_enabled('admin-dashboard', 'admin', True)
+ def test_service_dashboard(self):
+ self.assert_endpoint_ok_if_enabled('service-dashboard', 'service-dashboard', True)
def test_monitor(self):
self.assert_endpoint_ok_if_enabled('prometheus', 'monitor', True)
diff --git a/float/test/full.ref/services.yml b/float/test/full.ref/services.yml
index 893079d..95fcd23 100644
--- a/float/test/full.ref/services.yml
+++ b/float/test/full.ref/services.yml
@@ -1,7 +1,7 @@
---
include:
- - "../../services.yml.no-elasticsearch"
+ - "../../services.core.yml"
ok:
scheduling_group: backend
@@ -10,7 +10,7 @@ ok:
image: registry.git.autistici.org/ai3/docker/okserver:latest
port: 3100
env:
- PORT: 3100
+ ADDR: ":3100"
resources:
ram: 1g
cpu: 0.5
@@ -27,7 +27,7 @@ ok-root:
root: true
port: 799
env:
- PORT: 799
+ ADDR: ":799"
resources:
ram: 1g
cpu: 0.5