diff options
author | kwadronaut <kwadronaut@autistici.org> | 2024-02-05 12:52:32 +0000 |
---|---|---|
committer | kwadronaut <kwadronaut@autistici.org> | 2024-02-05 12:52:32 +0000 |
commit | 354794a833750a4ac2e90e20fa141e23c3cce20c (patch) | |
tree | 67c3cd1100f99d4291c9f2d58ccbb4f8b9c07888 | |
parent | c6d787af527667d24631d61f7b3050d25cce8139 (diff) | |
parent | c36242b3fea95e7e4883157c25d31bb333edae0d (diff) |
Merge branch 'feat/upgrade-float' into 'main'
#70 git subrepo pull --force float
See merge request leap/container-platform/lilypad!74
103 files changed, 2295 insertions, 2795 deletions
diff --git a/float/.gitlab-ci.yml b/float/.gitlab-ci.yml index 7e93b27..3453101 100644 --- a/float/.gitlab-ci.yml +++ b/float/.gitlab-ci.yml @@ -96,23 +96,29 @@ stop_full_test_review: - if: $CI_MERGE_REQUEST_ID when: manual -backup_test: +#backup_test: +# <<: *base_test +# variables: +# VM_IMAGE: "bullseye" +# CREATE_ENV_VARS: "--additional-config test/backup.ref/config-backup.yml --playbook test/backup.ref/site.yml" +# TEST_DIR: "test/backup.ref" + +bookworm_test: <<: *base_test variables: - VM_IMAGE: "bullseye" - CREATE_ENV_VARS: "--additional-config test/backup.ref/config-backup.yml --playbook test/backup.ref/site.yml" - TEST_DIR: "test/backup.ref" + VM_IMAGE: "bookworm" + CREATE_ENV_VARS: "-e config.float_debian_dist=bookworm" + TEST_DIR: "test/full.ref" docker_build_and_release_tests: stage: docker_build - image: docker:latest - services: - - docker:dind + image: quay.io/podman/stable + tags: [podman] script: - - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY - - cd test && docker build --build-arg ci_token=$CI_JOB_TOKEN --pull -t $IMAGE_TAG . - - docker tag $IMAGE_TAG $CI_REGISTRY_IMAGE:integration-test - - docker push $CI_REGISTRY_IMAGE:integration-test + - echo -n "$CI_JOB_TOKEN" | podman login -u gitlab-ci-token --password-stdin $CI_REGISTRY + - cd test && podman build --build-arg ci_token=$CI_JOB_TOKEN --pull -t $IMAGE_TAG . + - podman tag $IMAGE_TAG $CI_REGISTRY_IMAGE:integration-test + - podman push $CI_REGISTRY_IMAGE:integration-test only: changes: - test/float_integration_test/** diff --git a/float/.gitrepo b/float/.gitrepo index a9d1600..88bd8cc 100644 --- a/float/.gitrepo +++ b/float/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://git.autistici.org/ai3/float.git branch = master - commit = c2c4ad89ae7ad3f1f722bf4528e1057c377e2886 - parent = d9b2f97887292b92babad1990bd760c23e952416 - cmdver = 0.4.3 + commit = b02a3496b03886b59f9b1fd6c197d06c4a9ce66d + parent = c6d787af527667d24631d61f7b3050d25cce8139 + cmdver = 0.4.6 method = merge diff --git a/float/ci/deploy.yml b/float/ci/deploy.yml index 04354f1..b0d3d1a 100644 --- a/float/ci/deploy.yml +++ b/float/ci/deploy.yml @@ -54,7 +54,7 @@ variables: - > $BUILD_DIR/float/float create-env --domain=${DOMAIN} - --services=${BUILD_DIR}/float/services.yml.no-elasticsearch + --services=${BUILD_DIR}/float/services.core.yml --services=${SERVICES_FILE} --passwords=${BUILD_DIR}/float/passwords.yml.default --passwords=${PASSWORDS_FILE} diff --git a/float/docs/quickstart.it.md b/float/docs/quickstart.it.md index 72b17ac..59316ee 100644 --- a/float/docs/quickstart.it.md +++ b/float/docs/quickstart.it.md @@ -31,8 +31,8 @@ dipendenze possono essere installate con questo comando: ```shell sudo apt install golang ansible vagrant -go get -u git.autistici.org/ale/x509ca -go get -u git.autistici.org/ale/ed25519gen +go install git.autistici.org/ale/x509ca@latest +go install git.autistici.org/ale/ed25519gen@latest export PATH=$PATH:$HOME/go/bin ``` diff --git a/float/docs/quickstart.md b/float/docs/quickstart.md index 58b0b6a..f09a644 100644 --- a/float/docs/quickstart.md +++ b/float/docs/quickstart.md @@ -35,8 +35,8 @@ other dependencies can be installed with the following commands: ```shell sudo apt install golang ansible vagrant -go get -u git.autistici.org/ale/x509ca -go get -u git.autistici.org/ale/ed25519gen +go install git.autistici.org/ale/x509ca@latest +go install git.autistici.org/ale/ed25519gen@latest export PATH=$PATH:$HOME/go/bin ``` @@ -106,7 +106,7 @@ files for Ansible and Vagrant, with default values filled in by *create-env* automatically generates a default *admin* user, with password *password*. -You can read the [configuration reference](configuration.md) for +You can read the [configuration reference](reference.md) for details on the configuration file syntax and what the various options mean. diff --git a/float/docs/reference.md b/float/docs/reference.md index d588cf7..b088a68 100644 --- a/float/docs/reference.md +++ b/float/docs/reference.md @@ -164,7 +164,7 @@ proxy layer in the picture, the conceptual flow is quite simple: ``` reverse proxy | - V + V apache | V @@ -331,7 +331,7 @@ Float offers two backup mechanisms for datasets: favor correctness over availability, is also in the works. This backup mechanism is *extensible* to understand the structure and metadata of specific services' entities and accounts, if necessary. - + * There are a number of instances, in float, of a specific category of service, single-hosted small API services that run off a simple SQLite database, some of which are critical to float's operation @@ -568,7 +568,7 @@ Ansible roles to configure them. Note that, in its default setup, float will naturally assume a two-tier service topology, with "frontend" hosts handling traffic routing in a stateless fashion, and "backend" hosts running the actual -services. The default *services.yml.default* service description file +services. The default *services.default.yml* service description file literally expects the *frontend* and *backend* Ansible groups to be defined in your inventory. However, these are just roles, and there is nothing inherent in float that limits you to this kind of topology. @@ -1575,6 +1575,8 @@ provided: specify a regex (with a capture group) to extract back the host name from the target; the default regex will extract the short host name from URLs and host:port targets. +* (optionally) a *scrape_interval* if for some reason it should be + different than the default *prometheus_probe_scrape_interval*. So, in the context of the previous example, if we wanted to probe another float service called *myservice*, which hypothetically serves @@ -1829,7 +1831,7 @@ pairs that define group variables. ### Groups While you can define any host groups you want, the default service -configuration in float (*services.yml.default*) expects you to define +configuration in float (*services.default.yml*) expects you to define at least two: * *frontend*, for the public-facing reverse proxy hosts @@ -1922,12 +1924,12 @@ Service metadata is encoded as a dictionary of *service name*: Metadata for services that are part of the core infrastructure ships embedded with this repository, so when writing your own `services.yml` file, you only need to add your services to it. You should include the -*services.yml.default* file shipped with the float source, which +*services.default.yml* file shipped with the float source, which defines all the built-in services: ```yaml include: - - "/path/to/float/services.yml.default" + - "/path/to/float/services.default.yml" ``` The `include` directive is special: it does not define a service, but @@ -2032,6 +2034,9 @@ attributes: `labels`: An optional dictionary of key/value labels to set for this target (they will be added to all metrics scraped from it). +`scrape_interval`: Optionally override the scrape interval for this +target. + The Prometheus *job* labels for service targets will be automatically generated by *float* to include the service name and the endpoint port. @@ -2506,8 +2511,8 @@ tuples used for redirecting top-level domains to specific destinations service which is normally part of the log-collector infrastructure. As this is a large Java daemon with significant memory requirements, it is often useful to disable it for testing environments. Note that in -this case one should also import *services.yml.no-elasticsearch* -instead of the default *services.yml.default*. +this case one should import *services.core.yml* +instead of the default *services.default.yml*. `es_log_keep_days` is a dictionary that specifies the retention time for the various log types, in days. The default is `{ audit: 60, @@ -2533,6 +2538,10 @@ instances should scrape their targets (default 10s). `prometheus_lts_scrape_interval` sets how often the long-term Prometheus instances should scrape the primary ones (default 1m). +`prometheus_probe_scrape_interval` controls the default +scrape_interval setting for all blackbox probes, and it just defaults +to the value of prometheus_scrape_interval if unset. + `prometheus_external_targets` allows adding additional targets to Prometheus beyond those that are described by the service metadata. It is a list of entries with *name*, *targets* attributes. Optionally, you may specify a *scheme* @@ -2572,6 +2581,15 @@ to be notified about resolved alerts (default False). ### Third-party services +#### ACME + +Float's ACME certificate generation service does not require any +configuration, as it will automatically generate a Letsencrypt +account. It is possible, however, to tell it to use a specific account +by providing it with a private key: + +`acme_private_key` - ACME private key, in PEM format + #### Private Docker registry You can have float use a private Docker registry by providing it with @@ -2622,7 +2640,7 @@ server requires TLS `alertmanager_smtp_auth_username` and `alertmanager_smtp_auth_password` - credentials for authentication - + `alertmanager_smtp_hello` - hostname to use in the HELO SMTP header sent to the server (default *localhost*) @@ -2694,8 +2712,8 @@ unsupported. ```shell sudo apt install golang ansible -go get git.autistici.org/ale/x509ca -go get git.autistici.org/ale/ed25519gen +go install git.autistici.org/ale/x509ca@latest +go install git.autistici.org/ale/ed25519gen@latest export PATH=$PATH:$HOME/go/bin ``` @@ -3293,13 +3311,12 @@ process: * Set *float_debian_dist* to the new codename (e.g. "buster") in your group_vars/all configuration. * Run *float*, which will install the correct APT sources for the new - release. -* Run *apt dist-upgrade* manually or via Ansible. This part is not - automated yet due to the large variety in possible scenarios. -* Run *float* again: it will now detect that the distribution has - changed and reconfigure packages as needed. - + release and upgrade the servers. +* Reboot the servers into the new kernels. +If you want more control over this process (Debian upgrades have been +event-less for a while now, but it's not always been the case) you +can of course run the upgrade manually. # Example scenarios @@ -3326,7 +3343,7 @@ available) for the service. ```yaml include: - - "/path/to/float/services.yml.default" + - "/path/to/float/services.default.yml" ok: scheduling_group: backend num_instances: 1 @@ -3401,7 +3418,7 @@ The services.yml file: ```yaml include: - - "/path/to/float/services.yml.default" + - "/path/to/float/services.default.yml" videoconf: scheduling_group: videoconf num_instances: all diff --git a/float/docs/reference.pdf b/float/docs/reference.pdf Binary files differindex d6dcce0..c55f860 100644 --- a/float/docs/reference.pdf +++ b/float/docs/reference.pdf diff --git a/float/float b/float/float index f4a6fb2..d2727f3 100755 --- a/float/float +++ b/float/float @@ -51,7 +51,7 @@ Vagrant.configure(2) do |config| libvirt.memory = {{ ram }} libvirt.random_hostname = true libvirt.cpu_mode = 'host-passthrough' - libvirt.volume_cache = 'unsafe' + libvirt.disk_driver :cache => 'unsafe' {% if libvirt.remote_host %} libvirt.host = "{{ libvirt.remote_host }}" libvirt.username = "{{ libvirt.remote_user }}" @@ -120,7 +120,7 @@ include: - "{{ p | relpath(targetdir) }}" {% endfor %} {% else %} - - "{{ srcdir | relpath(targetdir) }}/services.yml.no-elasticsearch" + - "{{ srcdir | relpath(targetdir) }}/services.core.yml" {% endif %} ''', 'passwords.yml': '''--- @@ -185,7 +185,7 @@ DEFAULT_VARS = { 'nocows': 1, 'display_skipped_hosts': False, - 'callback_whitelist': 'float_ci', + 'callbacks_enabled': 'float_ci', 'stdout_callback': 'float_ci', 'host_key_checking': False, 'forks': 50, @@ -208,7 +208,7 @@ DEFAULT_VARS = { 'domain_public': [], 'testing': True, - 'float_debian_dist': 'bullseye', + 'float_debian_dist': 'bookworm', 'net_overlays': [{ 'name': 'vpn0', 'network': '192.168.13.0/24', @@ -441,7 +441,7 @@ def command_run(config, playbooks, print('Running playbook %s...' % (arg,)) - os.environ['LC_ALL'] = 'C' + os.environ['LC_ALL'] = 'C.UTF-8' _fix_ansible_vault_password_file() cmd = [os.getenv('ANSIBLE_PLAYBOOK', 'ansible-playbook'), '-i', config] diff --git a/float/playbooks/apt-upgrade.yml b/float/playbooks/apt-upgrade.yml index f2285d3..dc2a13c 100644 --- a/float/playbooks/apt-upgrade.yml +++ b/float/playbooks/apt-upgrade.yml @@ -1,4 +1,5 @@ --- + - hosts: all tasks: - copy: diff --git a/float/playbooks/frontend.yml b/float/playbooks/frontend.yml index 27fe8e8..85715f2 100644 --- a/float/playbooks/frontend.yml +++ b/float/playbooks/frontend.yml @@ -13,10 +13,10 @@ roles: - float-infra-dns -- hosts: admin_dashboard +- hosts: service_dashboard gather_facts: no roles: - - float-infra-admin-dashboard + - float-infra-service-dashboard - hosts: acme gather_facts: no diff --git a/float/playbooks/init-credentials.yml b/float/playbooks/init-credentials.yml index 555b0ba..6c02e0e 100644 --- a/float/playbooks/init-credentials.yml +++ b/float/playbooks/init-credentials.yml @@ -28,7 +28,6 @@ - dnssec - ssh - sso - - x509 # First of all, generate secrets from the passwords.yml file. - name: Initialize secrets @@ -50,8 +49,17 @@ - name: Generate SSO credentials local_action: ed25519 privkey="{{ credentials_dir }}/sso/secret.key" pubkey="{{ credentials_dir }}/sso/public.key" - - name: Generate global DH params - local_action: command openssl dhparam -out "{{ credentials_dir }}/x509/dhparam" "{{ dhparam_bits | default('2048') }}" creates="{{ credentials_dir }}/x509/dhparam" + - set_fact: + default_x509_ca_list: + - {tag: x509} + + - name: Create X509 CA directory + local_action: file path="{{ credentials_dir }}/{{ item.tag }}" state=directory + loop: "{{ x509_ca_list | default(default_x509_ca_list) }}" - name: Generate the X509 CA certificate - local_action: x509_ca ca_subject="{{ x509_ca_subject | default('CN=Service CA') }}" ca_cert_path="{{ credentials_dir }}/x509/ca.pem" ca_key_path="{{ credentials_dir }}/x509/ca_private_key.pem" + local_action: x509_ca ca_subject="{{ item.subject | default('CN=Service CA') }}" ca_cert_path="{{ credentials_dir }}/{{ item.tag }}/ca.pem" ca_key_path="{{ credentials_dir }}/{{ item.tag }}/ca_private_key.pem" + loop: "{{ x509_ca_list | default(default_x509_ca_list) }}" + + - name: Generate global DH params + local_action: command openssl dhparam -out "{{ credentials_dir }}/x509/dhparam" "{{ dhparam_bits | default('2048') }}" creates="{{ credentials_dir }}/x509/dhparam" diff --git a/float/plugins/action/float_authorized_keys.py b/float/plugins/action/float_authorized_keys.py new file mode 100644 index 0000000..f891d9c --- /dev/null +++ b/float/plugins/action/float_authorized_keys.py @@ -0,0 +1,27 @@ +# Prepare a SSH authorized_keys file content using float 'admins'. + +from ansible.plugins.action import ActionBase + + +class ActionModule(ActionBase): + + TRANSFERS_FILES = False + + def run(self, tmp=None, task_vars=None): + admins = self._templar.template('{{ admins }}') + authorized_keys = [] + + # For each SSH key, add a comment with the owner's username. + for entry in admins: + username = entry['name'] + if 'ssh_keys' not in entry: + continue + for key in entry['ssh_keys']: + key_without_comment = ' '.join(key.split()[:2]) + key_with_comment = f'{key_without_comment} {username}\n' + authorized_keys.append(key_with_comment) + + result = super(ActionModule, self).run(tmp, task_vars) + result['ansible_facts'] = {'float_authorized_keys': ''.join(authorized_keys)} + result['changed'] = False + return result diff --git a/float/plugins/action/float_container_options.py b/float/plugins/action/float_container_options.py new file mode 100644 index 0000000..664af57 --- /dev/null +++ b/float/plugins/action/float_container_options.py @@ -0,0 +1,108 @@ +from ansible.plugins.action import ActionBase + + +TMPFS_FLAGS = 'tmpfs-mode=01777' +DEFAULT_TMPFS_SIZE = '64M' + + +class ActionModule(ActionBase): + + TRANSFERS_FILES = False + + # Options to set the container environment. + def _environment_options(self, service, container): + service_name = service['name'] + hostname = self._templar.template('{{ inventory_hostname }}') + domain = self._templar.template('{{ domain }}') + + env = { + 'FLOAT_SERVICE': f'{service_name}.{domain}', + 'FLOAT_INSTANCE_NAME': f'{hostname}.{service_name}.{domain}', + 'FLOAT_CONTAINER_IMAGE': container['image'], + 'FLOAT_CONTAINER_NAME': f'{service_name}-{container["name"]}', + } + if 'env' in container: + env.update(container['env']) + + options = [] + for key, value in sorted(env.items()): + options.append(f'--env={key}={value}') + return options + + # Options for volumes (tmpfs, bind mounts). + def _mount_options(self, service, container): + options = [] + add_tmpfs = True + + def _bind(src, dst): + options.append(f'--mount=type=bind,source={src},destination={dst}') + + def _tmpfs(dst, flags=None): + opt = f'--mount=type=tmpfs,destination={dst},{TMPFS_FLAGS}' + if flags: + opt += f',{flags}' + options.append(opt) + + if container.get('readonly', True): + options.append('--read-only') + add_tmpfs = False + + for vol in container.get('volumes', []): + for src, dst in sorted(vol.items()): + if dst == '/tmp': + add_tmpfs = False + if src == 'tmpfs': + _tmpfs(dst, f'tmpfs-size={DEFAULT_TMPFS_SIZE}') + elif src.startswith('tmpfs/'): + sz = src[6:] + _tmpfs(dst, f'tmpfs-size={sz}') + else: + _bind(src, dst) + _tmpfs('/run', 'tmpfs-size=16M,exec=true,notmpcopyup') + _bind('/dev/log', '/dev/log') + _bind('/etc/credentials/system', '/etc/ssl/certs') + if add_tmpfs: + _tmpfs('/tmp', f'tmpfs-size={DEFAULT_TMPFS_SIZE},notmpcopyup') + + for creds in service.get('service_credentials', []): + creds_name = creds['name'] + ca_tag = creds.get('ca_tag', 'x509') + creds_path = f'/etc/credentials/{ca_tag}/{creds_name}' + _bind(creds_path, creds_path) + + return options + + # Network options (ports). + def _network_options(self, container): + options = ['--network=host'] + ports = [] + if 'ports' in container: + ports = container['ports'] + elif 'port' in container: + ports = [container['port']] + for port in sorted(ports): + options.append(f'--expose={port}') + return options + + def run(self, tmp=None, task_vars=None): + service = self._task.args['service'] + container = self._task.args['container'] + + options = [] + + options.extend(self._environment_options(service, container)) + options.extend(self._mount_options(service, container)) + options.extend(self._network_options(container)) + + is_root = container.get('root') + if container.get('drop_capabilities', not is_root): + options.append('--security-opt=no-new-privileges') + options.append('--cap-drop=all') + + if 'docker_options' in container: + options.extend(container['docker_options'].split()) + + result = super().run(tmp, task_vars) + result['options'] = options + result['changed'] = False + return result diff --git a/float/plugins/action/tinc_host_conf.py b/float/plugins/action/tinc_host_conf.py deleted file mode 100644 index 5b83883..0000000 --- a/float/plugins/action/tinc_host_conf.py +++ /dev/null @@ -1,73 +0,0 @@ -# Generate a host configuration file for tinc (fetching the public key -# from the remote host), and store the result in an Ansible fact. - -from ansible.plugins.action import ActionBase -from ansible.errors import AnsibleFileNotFound -from ansible.module_utils._text import to_text - - -HOST_TEMPLATE = ''' -{% for ip in ips %} -Address = {{ ip }} -{% endfor %} -Port = {{ tinc_config.port | default('655') }} -Cipher = {{ tinc_config.cipher | default('aes-128-cbc') }} -Digest = {{ tinc_config.digest | default('sha256') }} -Compression = {{ tinc_config.compression | default('0') }} -PMTU = {{ tinc_config.pmtu | default('1460') }} -Subnet = {{ tinc_host_subnet }} - -{{ tinc_host_public_key }} -''' - - -class ActionModule(ActionBase): - - TRANSFERS_FILES = False - - def _cmd(self, task_vars, args, creates=None): - args = { - '_raw_params': ' '.join(args), - 'creates': creates, - } - return self._execute_module( - module_name='command', - module_args=args, - task_vars=task_vars, - wrap_async=False) - - def run(self, tmp=None, task_vars=None): - overlay = self._task.args['overlay'] - subnet = self._templar.template('{{ ip_%s }}/32' % overlay) - - # Find the overlay configuration by scanning the 'net_overlays' - # configuration variable, which is a list - it would be simpler with - # a dictionary. - net_overlays = self._templar.template('{{ net_overlays|default([]) }}') - overlay_config = {'name': overlay} - for n in net_overlays: - if n['name'] == overlay: - overlay_config = n - break - - result = super(ActionModule, self).run(tmp, task_vars) - - # Fetch the host public key. - pubkey = self._cmd(task_vars, [ - '/bin/cat', '/etc/tinc/%s/rsa_key.pub' % overlay])['stdout'] - if not pubkey: - result['failed'] = True - result['msg'] = "could not fetch host public key" - return result - - # Generate the template, adding some custom variables of our own. - self._templar._available_variables['tinc_host_subnet'] = subnet - self._templar._available_variables['tinc_host_public_key'] = pubkey - self._templar._available_variables['tinc_config'] = overlay_config - data = self._templar.do_template(HOST_TEMPLATE, - preserve_trailing_newlines=True, - escape_backslashes=False) - - result['ansible_facts'] = {'tinc_host_config': data} - result['changed'] = False - return result diff --git a/float/plugins/inventory/float.py b/float/plugins/inventory/float.py index 808b52b..46c2b25 100644 --- a/float/plugins/inventory/float.py +++ b/float/plugins/inventory/float.py @@ -352,6 +352,14 @@ def _build_public_endpoints_map(services): return upstreams, endpoints +def _build_public_endpoint_port_map(services): + endpoints_by_port = {} + for svc in services.values(): + for pe in svc.get('public_endpoints', []): + endpoints_by_port[pe['port']] = pe['name'] + return endpoints_by_port + + # Build the map of upstreams for 'horizontal' (well-known etc) HTTP # public endpoints. # @@ -491,7 +499,9 @@ class Assignments(object): return str(self._fwd) @classmethod - def _available_hosts(cls, service, group_map): + def _available_hosts(cls, service, group_map, service_hosts_map): + if 'schedule_with' in service: + return service_hosts_map[service['schedule_with']] scheduling_groups = ['all'] if 'scheduling_group' in service: scheduling_groups = [service['scheduling_group']] @@ -499,6 +509,8 @@ class Assignments(object): scheduling_groups = service['scheduling_groups'] available_hosts = set() for g in scheduling_groups: + if g not in group_map: + raise Exception(f'The scheduling_group "{g}" is not defined in inventoy') available_hosts.update(group_map[g]) return list(available_hosts) @@ -518,13 +530,17 @@ class Assignments(object): # Iterations should happen over sorted items for reproducible # results. The sort function combines the 'scheduling_order' - # attribute (default -1) and the service name. + # attribute (default -1), the presence of the 'schedule_with' + # attribute, and the service name. def _sort_key(service_name): - return (services[service_name].get('scheduling_order', -1), service_name) + return (services[service_name].get('scheduling_order', -1), + 1 if 'schedule_with' in services[service_name] else 0, + service_name) for service_name in sorted(services.keys(), key=_sort_key): service = services[service_name] - available_hosts = cls._available_hosts(service, group_map) + available_hosts = cls._available_hosts(service, group_map, + service_hosts_map) num_instances = service.get('num_instances', 'all') if num_instances == 'all': service_hosts = sorted(available_hosts) @@ -611,6 +627,7 @@ def run_scheduler(config): # The following variables are just used for debugging purposes (dashboards). 'float_service_assignments': assignments._fwd, 'float_service_masters': assignments._masters, + 'float_http_endpoints_by_port': _build_public_endpoint_port_map(services), }) # Set the HTTP frontend configuration on the 'frontend' group. diff --git a/float/roles/float-base-backup-metadata/handlers/main.yml b/float/roles/float-base-backup-metadata/handlers/main.yml index 34aab05..68eff7a 100644 --- a/float/roles/float-base-backup-metadata/handlers/main.yml +++ b/float/roles/float-base-backup-metadata/handlers/main.yml @@ -4,3 +4,6 @@ systemd: name: tabacco-metadb.service state: restarted + # Allow failure when testing backups, the unit can't start until later. + ignore_errors: "{{ testing | default(True) }}" + diff --git a/float/roles/float-base-datasets/tasks/dataset_litestream.yml b/float/roles/float-base-datasets/tasks/dataset_litestream.yml index 7e31240..1e4a672 100644 --- a/float/roles/float-base-datasets/tasks/dataset_litestream.yml +++ b/float/roles/float-base-datasets/tasks/dataset_litestream.yml @@ -2,7 +2,6 @@ - set_fact: dataset_filename: "{{ dataset.filename }}" - dataset_replica_url: "{{ backup_litestream_url | default('') }}/{{ dataset_tag }}" dataset_replication_unit: "replicate-{{ dataset_tag }}.service" # Just don't backup at all if litestream is not configured. dataset_should_backup: "{{ dataset_should_backup and (backup_litestream_config is defined) }}" @@ -18,6 +17,10 @@ when: "dataset_should_backup and backup_litestream_config.get('type', 's3') == 's3'" - set_fact: + dataset_litestream_config: "{{ dataset_litestream_config | combine(dataset.litestream_params) }}" + when: "dataset_should_backup and (dataset.litestream_params is defined)" + +- set_fact: litestream_config: dbs: - path: "{{ dataset_path }}/{{ dataset_filename }}" diff --git a/float/roles/float-base-datasets/templates/litestream-restore-script.j2 b/float/roles/float-base-datasets/templates/litestream-restore-script.j2 index 4d0d28a..f37e36d 100644 --- a/float/roles/float-base-datasets/templates/litestream-restore-script.j2 +++ b/float/roles/float-base-datasets/templates/litestream-restore-script.j2 @@ -1,8 +1,11 @@ #!/bin/sh -{% if backup_litestream_url is defined %} +{% if backup_litestream_config is defined %} # Restore the dataset {{ dataset_name }} using litestream. +set -a +. /etc/litestream/{{ dataset_tag }}.env + /usr/bin/litestream restore --config=/etc/litestream/{{ dataset_tag }}.yml --if-replica-exists -v "{{ dataset_path }}/{{ dataset_filename }}" if [ $? -gt 0 ]; then diff --git a/float/roles/float-base-datasets/templates/restore-service.j2 b/float/roles/float-base-datasets/templates/restore-service.j2 index 07801ed..620f140 100644 --- a/float/roles/float-base-datasets/templates/restore-service.j2 +++ b/float/roles/float-base-datasets/templates/restore-service.j2 @@ -9,5 +9,8 @@ Type=oneshot RemainAfterExit=true ExecStart=/usr/lib/float/float-dataset-restore {{ dataset_tag }} +Restart=on-failure +RestartSec=10s + [Install] RequiredBy={{ required_by | join(' ') }} diff --git a/float/roles/float-base-docker/defaults/main.yml b/float/roles/float-base-docker/defaults/main.yml index 8b9cd78..641abd4 100644 --- a/float/roles/float-base-docker/defaults/main.yml +++ b/float/roles/float-base-docker/defaults/main.yml @@ -19,12 +19,5 @@ docker_daemon_config: docker_daemon_config_testing: insecure-registries: ["192.168.10.1:5000"] -# Where should we get the Podman packages from? Possible choices are -# 'ai' (a version pinned in our repository, tested working on Debian -# buster), 'debian' (use standard Debian packages, only available from -# bullseye) or 'kubic' (use the upstream Kubic repositories, with the -# latest release). The default is distribution-dependent. -podman_package_source: "{{ 'debian' if float_debian_dist == 'bullseye' else 'ai' }}" - # Provide a CA for Podman (to inject a caching registry proxy, for example). #podman_additional_ssl_ca: "{{ lookup('file', credentials_dir + '/podman_ca.pem') }}" diff --git a/float/roles/float-base-docker/files/in-container.sh b/float/roles/float-base-docker/files/in-container.sh new file mode 100644 index 0000000..998717b --- /dev/null +++ b/float/roles/float-base-docker/files/in-container.sh @@ -0,0 +1,13 @@ +if [ -z "${PS1-}" ]; then + return +fi + +_in_container_completion() { + local cur_word args + cur_word="${COMP_WORDS[COMP_CWORD]}" + args=$(podman ps --format '{{ .Names }}') + + COMPREPLY=($(compgen -W "$args" -- "$cur_word")) +} + +complete -F _in_container_completion in-container diff --git a/float/roles/float-base-docker/tasks/main.yml b/float/roles/float-base-docker/tasks/main.yml index 5fc73a5..998e612 100644 --- a/float/roles/float-base-docker/tasks/main.yml +++ b/float/roles/float-base-docker/tasks/main.yml @@ -30,10 +30,19 @@ - src: "in-container.j2" dst: "/usr/local/bin/in-container" +- name: Install docker-related files + copy: + src: "{{ item.src }}" + dest: "{{ item.dst }}" + mode: 0644 + loop: + - src: "in-container.sh" + dst: "/etc/profile.d/in-container.sh" + - name: Install docker cleanup cron job copy: dest: /etc/cron.d/docker-cleanup - content: "33 */3 * * * root runcron --quiet /usr/local/bin/docker-cleanup\n" + content: "33 3 * * * root runcron --quiet /usr/local/bin/docker-cleanup\n" mode: 0644 - import_tasks: start.yml diff --git a/float/roles/float-base-docker/tasks/podman.yml b/float/roles/float-base-docker/tasks/podman.yml index d925888..3168e9f 100644 --- a/float/roles/float-base-docker/tasks/podman.yml +++ b/float/roles/float-base-docker/tasks/podman.yml @@ -1,13 +1,34 @@ --- +# Pick a package source for Podman - defaults to using the stock +# Debian package since bullseye. Possible choices are 'ai' (a version +# pinned in our repository, tested working on Debian buster), 'debian' +# (use standard Debian packages, only available from bullseye) or +# 'kubic' (use the upstream Kubic repositories, with the latest +# release). +- set_fact: + podman_default_package_source: "{{ podman_default_package_source_by_distro[float_debian_dist] | default('debian') }}" +- set_fact: + podman_package_source: "{{ podman_default_package_source }}" + when: "podman_package_source is not defined" + - include_tasks: "podman_{{ podman_package_source }}.yml" +# Try to detect which podman version we're on based on source and +# distribution. +- set_fact: + podman_version: "{{ 3 if float_debian_dist in ('buster', 'bullseye') else 4 }}" + when: "podman_package_source == 'debian'" +- set_fact: + podman_version: 4 + when: "podman_package_source != 'debian'" + # If we've changed sources.list for podman, it is important # that we run apt upgrade now. - name: Apt upgrade apt: upgrade: 'yes' - when: "podman_sources_list.changed" + when: "podman_sources_list is defined and podman_sources_list.changed" - name: Symlink podman to docker file: diff --git a/float/roles/float-base-docker/tasks/start.yml b/float/roles/float-base-docker/tasks/start.yml index d788e19..cc8532c 100644 --- a/float/roles/float-base-docker/tasks/start.yml +++ b/float/roles/float-base-docker/tasks/start.yml @@ -4,21 +4,31 @@ file: path: /usr/lib/float/docker state: directory + owner: root + group: root + mode: "0750" + +- name: Configure container runner + float_container_options: + service: "{{ services[item.service] }}" + container: "{{ item.container }}" + loop: "{{ float_enabled_containers }}" + register: float_container_options - name: Create run scripts template: src: run.sh.j2 - dest: "/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh" - mode: 0755 + dest: "/usr/lib/float/docker/run-{{ item.0.tag }}.sh" + mode: "0750" + owner: root + group: root + vars: + service_user: "{{ services[item.0.service].user }}" + container: "{{ item.0.container }}" + container_options: "{{ item.1.options }}" + tag: "{{ item.0.tag }}" register: docker_systemd_run_script - loop: "{{ float_enabled_containers }}" - -# TODO temporary only, to be removed once all symlinks are gone -- name: Cleanup Alias symlink - file: - dest: "/etc/systemd/system/{{ item.tag }}" - state: absent - loop: "{{ float_enabled_containers }}" + loop: "{{ float_enabled_containers | zip(float_container_options.results) }}" - name: Set up the systemd units template: diff --git a/float/roles/float-base-docker/templates/cleanup.sh.j2 b/float/roles/float-base-docker/templates/cleanup.sh.j2 index f60504c..81e4e9a 100644 --- a/float/roles/float-base-docker/templates/cleanup.sh.j2 +++ b/float/roles/float-base-docker/templates/cleanup.sh.j2 @@ -3,6 +3,6 @@ # Cleanup unused and unreferenced Docker images and containers. # -{{ container_runtime }} system prune --all --volumes --force +{{ container_runtime }} system prune --all --volumes --force --filter until=24h exit 0 diff --git a/float/roles/float-base-docker/templates/float-pull-image.j2 b/float/roles/float-base-docker/templates/float-pull-image.j2 index ebe86dd..606adc4 100755 --- a/float/roles/float-base-docker/templates/float-pull-image.j2 +++ b/float/roles/float-base-docker/templates/float-pull-image.j2 @@ -10,6 +10,8 @@ binary={{ container_runtime }} auth_file={{ docker_auth_file }} +manifest_mime_types="application/vnd.docker.distribution.manifest.v2+json,application/vnd.oci.image.manifest.v1+json" + # Automatically pick up proxy settings if present. [ -e /etc/containers/proxy-settings.sh ] && . /etc/containers/proxy-settings.sh @@ -19,11 +21,10 @@ get_main_auth_token() { get_auth_token() { local url="$1" - local auth_hdr="$(curl -s -I -H "Accept: application/vnd.docker.distribution.manifest.v2+json" "$url" \ + local auth_hdr="$(curl -s -I -H "Accept: ${manifest_mime_types}" "$url" \ | awk 'BEGIN{IGNORECASE=1} /^www-authenticate:/ {print $3}')" if [ -z "$auth_hdr" ]; then - echo "Could not obtain authentication token from $url" >&2 - exit 1 + return fi local scope=$(printf "%s" "${auth_hdr}" | sed -e 's/^.*scope="\([^"]*\)".*$/\1/') local service=$(printf "%s" "${auth_hdr}" | sed -e 's/^.*service="\([^"]*\)".*$/\1/') @@ -36,8 +37,8 @@ get_auth_token() { get_remote_image_version() { local url="https://${registry_hostname}/v2/${image_path}/manifests/${image_tag}" local token="$(get_auth_token "$url")" - curl -sf -H "Accept: application/vnd.docker.distribution.manifest.v2+json" \ - -H "Authorization: Bearer ${token}" \ + curl -sf -H "Accept: ${manifest_mime_types}" \ + ${token:+-H "Authorization: Bearer ${token}"} \ "$url" \ | jq -r .config.digest } diff --git a/float/roles/float-base-docker/templates/run.sh.j2 b/float/roles/float-base-docker/templates/run.sh.j2 index 4e08780..9b264d5 100644 --- a/float/roles/float-base-docker/templates/run.sh.j2 +++ b/float/roles/float-base-docker/templates/run.sh.j2 @@ -1,132 +1,43 @@ -#!/bin/sh -# Start the {{ item.service }}-{{ item.container.name }} container. +#!/bin/bash +# Start the {{ tag }} container. -{# The purpose of this file is to generate a nice, readable shell script, that - can be inspected on the resulting system. Due to the issues with whitespace - handling in Jinja, we programatically build an options list, and dump it - at once at the end - the resulting template looks a lot like code. -#} +opts=( {{ container_options | map('quote') | join(' ') }} ) -{# Define a global namespace, and a macro to add options to it #} -{%- set g = namespace( - options=[], - has_custom_tmp_volume=False) --%} -{%- macro opt(name, value=None) -%} -{% if value is not none -%} - {{ g.options.append('--%s=%s' % (name, value)) }} -{% else -%} - {{ g.options.append('--%s' % name) }} -{% endif -%} -{%- endmacro -%} -{%- if item.container.docker_options is defined -%} - {{ g.options.append(item.container.docker_options) }} -{%- endif -%} - -{{ opt('network', 'host') }} - -{# Environment variables #} -{% for k, v in item.container.get('env', {}) | dictsort %} - {{ opt('env', '%s=%s' % (k, v | quote)) }} -{% endfor %} - -{# Port configuration #} -{% if item.container.get('ports', []) %} - {% for port in item.container.get('ports',[]) %} - {{ opt('expose', port) }} - {% endfor %} -{% elif item.container.get('port') %} - {{ opt('expose', item.container.port) }} -{% endif %} - -{# Root read-only? #} -{% if item.container.get('readonly', True) %} - {{ opt('read-only') }} - {# podman will mount /run /tmp and /var/tmp with writable tmpfs when - the --read-only option is specified (see --read-only-tmpfs), so - do not add our own /tmp mount #} - {% set g.has_custom_tmp_volume = True %} -{% endif %} - -{# Define mountpoints / volumes. We treat /tmp specially as the container - configuration might override it. To detect if one of the mounts is a - /tmp mount, and propagate the result outside of the loop, we have to - use a special "jinja namespace" variable - see the discussion on - assignment scope at https://jinja.palletsprojects.com/en/2.11.x/templates/#assignments -#} -{% for mount in item.container.get('volumes', []) %} - {% for k, v in mount.items() %} - {% if v == '/tmp' %} - {% set g.has_custom_tmp_volume = True %} - {% endif %} - {% if k == 'tmpfs' -%} - {{ opt('mount', 'type=tmpfs,destination=%s,tmpfs-mode=01777,tmpfs-size=64M' % v) }} - {% else -%} - {{ opt('mount', 'type=bind,source=%s,destination=%s' % (k, v)) }} - {% endif %} - {% endfor %} -{% endfor %} - -{# System-level volumes that all containers have. - Include by default tmpfs mounts for standard Debian locations (/tmp, - /run), and a bind mount for the syslog socket in /dev/log. #} -{{ opt('mount', 'type=tmpfs,destination=/run,tmpfs-mode=01777,tmpfs-size=16M,noexec=false,notmpcopyup') }} -{% if not g.has_custom_tmp_volume %} - {{ opt('mount', 'type=tmpfs,destination=/tmp,tmpfs-mode=01777,tmpfs-size=64M,notmpcopyup') }} -{% endif %} -{{ opt('mount', 'type=bind,source=/dev/log,destination=/dev/log') }} -{{ opt('mount', 'type=bind,source=/etc/credentials/system,destination=/etc/ssl/certs') }} - -{# Mount the service credentials inside the container. #} -{% for creds in services[item.service].get('service_credentials', []) %} - {{ opt('mount', 'type=bind,source=/etc/credentials/x509/' + creds.name + ',destination=/etc/credentials/x509/' + creds.name) }} -{% endfor %} - -{# Security options (unless root=True) #} -{% if item.container.get('drop_capabilities', not item.container.get('root')) %} - {{ opt('security-opt', 'no-new-privileges') }} - {{ opt('cap-drop', 'all') }} -{% endif %} - -{# The following are options that are set at runtime, we can't use template machinery #} -opts= # Optionally mount the OpenCensus tracing config in the container. if [ -d /etc/tracing ]; then - opts="$opts --mount=type=bind,source=/etc/tracing,destination=/etc/tracing" + opts+=("--mount=type=bind,source=/etc/tracing,destination=/etc/tracing") fi -{% if not item.container.get('root') %} +{% if not container.get('root') %} # Run as unprivileged user. -container_uid=$(id -u {{ services[item.service].user }}) -container_gid=$(id -g {{ services[item.service].user }}) -opts="$opts --user=$container_uid:$container_gid" +container_uid=$(id -u {{ service_user }}) +container_gid=$(id -g {{ service_user }}) +opts+=("--user=$container_uid:$container_gid") # Add additional groups that the user is a member of. -for gid in $(id -G {{ services[item.service].user }}); do +for gid in $(id -G {{ service_user }}); do if [ $gid -ne $container_gid ]; then - opts="$opts --group-add=$gid" + opts+=("--group-add=$gid") fi done {% endif %} - -# TODO: move to --log-driver=passthrough once it is supported -# by the Podman version in Debian stable, and then add the -d -# option to get rid of the useless 'podman' process. {% if container_runtime == 'podman' %} exec /usr/bin/podman run \ +{% if float_debian_dist in ('buster', 'bullseye') %} + --log-driver=none \ +{% else %} + -d --log-driver=journald \ +{% endif %} --cgroups=disabled \ --replace \ --sdnotify=conmon \ {% elif container_runtime == 'docker' %} exec /usr/bin/systemd-docker --env run \ + --log-driver=none \ {% endif %} - --rm --name {{ item.service }}-{{ item.container.name }} \ + --rm --name {{ tag }} \ --pull=never \ - --log-driver=none \ --no-healthcheck \ - $opts \ -{% for opt in g.options %} - {{ opt }} \ -{% endfor %} + "${opts[@]}" \ "$@" \ - {{ item.container.image }} {{ item.container.get('args', '') }} + {{ container.image }} {{ container.get('args', '') }} diff --git a/float/roles/float-base-docker/templates/systemd.j2 b/float/roles/float-base-docker/templates/systemd.j2 index 1d45fc1..53d9aec 100644 --- a/float/roles/float-base-docker/templates/systemd.j2 +++ b/float/roles/float-base-docker/templates/systemd.j2 @@ -7,15 +7,25 @@ Requires=docker.service [Service] ExecStartPre=-rm -f %t/%N.cid +{# Since Podman 4, we can switch to a 'forking' model without the extra podman process + due to the support for --log-driver=passthrough. #} +{% if container_runtime == 'podman' and podman_version != '3' %} +Type=forking +ExecStart=/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh --cidfile=%t/%N.cid --conmon-pidfile=%t/%N.pid +ExecStopPost=/usr/bin/{{ container_runtime }} rm -f -i --cidfile=%t/%N.cid +PIDFile=%t/%N.pid +{% else %} +Type=notify +NotifyAccess=all ExecStart=/usr/lib/float/docker/run-{{ item.service }}-{{ item.container.name }}.sh --cidfile=%t/%N.cid ExecStopPost=-/usr/bin/{{ container_runtime }} rm -f -i --cidfile=%t/%N.cid +{% endif %} ExecStopPost=-rm -f %t/%N.cid TimeoutStopSec=60 +TimeoutStartSec=240 KillMode=mixed Restart=always RestartSec=3s -Type=notify -NotifyAccess=all SyslogIdentifier={{ item.service }}-{{ item.container.name }} {% if item.container.resources is defined %} @@ -34,7 +44,7 @@ LimitNOFILE=65535 IPAddressDeny=any IPAddressAllow=localhost {# This is a terrible way to determine which private networks the host is on. - It would be a good candidate for pre-processing in the float plugin. #} + It would be a good candidate for pre-processing in the float plugin. #} {% for net_overlay in net_overlays | sort if ('ip_' + net_overlay.name) in hostvars[inventory_hostname] %} IPAddressAllow={{ net_overlay.network }} {% endfor %} diff --git a/float/roles/float-base-docker/vars/podman.yml b/float/roles/float-base-docker/vars/podman.yml new file mode 100644 index 0000000..4416871 --- /dev/null +++ b/float/roles/float-base-docker/vars/podman.yml @@ -0,0 +1,5 @@ +--- + +podman_default_package_source_by_distro: + stretch: ai + buster: ai diff --git a/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml b/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml index df0e0fc..c3b7746 100644 --- a/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml +++ b/float/roles/float-base-net-overlay/tasks/configure_netoverlay_tinc.yml @@ -3,7 +3,11 @@ - set_fact: tinc_net: "{{ item }}" tinc_dir: "/etc/tinc/{{ item }}" - tinc_netmask_cidr: 24 + tinc_host_ip: "{{ hostvars[inventory_hostname]['ip_' + item] }}" + net_overlay_config: "{{ net_overlays | selectattr('name', 'eq', item) | first }}" + +- set_fact: + tinc_host_ip_cidr: "{{ tinc_host_ip }}/{{ net_overlay_config.network | regex_replace('^.*/', '') }}" - name: Install the tinc package apt: @@ -18,11 +22,26 @@ args: creates: "{{ tinc_dir }}/rsa_key.pub" -- name: Generate tinc host configuration - tinc_host_conf: - overlay: "{{ tinc_net }}" +- name: Fetch tinc host public key + slurp: + src: "{{ tinc_dir }}/rsa_key.pub" + register: tinc_host_public_key check_mode: no +- name: Generate tinc host configuration + set_fact: + tinc_host_config: | + {% for ip in ips %} + Address = {{ ip }} + {% endfor %} + Port = {{ net_overlay_config.port | default('655') }} + Cipher = {{ net_overlay_config.cipher | default('aes-128-cbc') }} + Digest = {{ net_overlay_config.digest | default('sha256') }} + Compression = {{ net_overlay_config.compression | default('0') }} + PMTU = {{ net_overlay_config.pmtu | default('1460') }} + Subnet = {{ tinc_host_ip }}/32 + {{ tinc_host_public_key['content'] | b64decode }} + # Generate tinc host entries for all *other* hosts. Skip if for # some reason (failures) we weren't able to fetch it. - name: Install tinc host configuration diff --git a/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2 b/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2 index 4526343..b8fb718 100644 --- a/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2 +++ b/float/roles/float-base-net-overlay/templates/firewall/11net-overlay-raw.j2 @@ -1,8 +1,8 @@ {% macro allow_host_ips(h, chain) %} -{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv4 | sort %} +{% for ip in hostvars[h]['ips'] | ansible.utils.ipv4 | sort %} add_rule4 -A {{ chain }} -s {{ ip }} -j CT --notrack {% endfor %} -{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv6 | sort %} +{% for ip in hostvars[h]['ips'] | ansible.utils.ipv6 | sort %} add_rule6 -A {{ chain }} -s {{ ip }} -j CT --notrack {% endfor %} {% endmacro %} diff --git a/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2 b/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2 index 538bcc7..d973ec6 100644 --- a/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2 +++ b/float/roles/float-base-net-overlay/templates/tinc/tinc-up.j2 @@ -1,4 +1,4 @@ #!/bin/sh ip link set $INTERFACE up -ip addr add {{ hostvars[inventory_hostname]['ip_' + tinc_net] }}/{{ tinc_netmask_cidr }} dev $INTERFACE +ip addr add {{ tinc_host_ip_cidr }} dev $INTERFACE exit 0 diff --git a/float/roles/float-base-service-credentials/meta/main.yml b/float/roles/float-base-service-credentials/meta/main.yml index a60e6df..8da7fb7 100644 --- a/float/roles/float-base-service-credentials/meta/main.yml +++ b/float/roles/float-base-service-credentials/meta/main.yml @@ -4,4 +4,6 @@ dependencies: - role: float-util-credentials vars: credentials: "{{ float_host_service_credentials }}" + ca_tag: "x509" + diff --git a/float/roles/float-base/files/modprobe-hardening.conf b/float/roles/float-base/files/modprobe-hardening.conf new file mode 100644 index 0000000..c0cd23f --- /dev/null +++ b/float/roles/float-base/files/modprobe-hardening.conf @@ -0,0 +1,33 @@ +# Disable automatic conntrack helper assignment. +options nf_conntrack nf_conntrack_helper=0 + +# Obscure network protocols with a bad security track record. +install dccp /bin/false +install sctp /bin/false +install rds /bin/false +install tipc /bin/false +install n-hdlc /bin/false +install ax25 /bin/false +install netrom /bin/false +install x25 /bin/false +install rose /bin/false +install decnet /bin/false +install econet /bin/false +install af_802154 /bin/false +install ipx /bin/false +install appletalk /bin/false +install psnap /bin/false +install p8023 /bin/false +install p8022 /bin/false +install can /bin/false +install atm /bin/false + +# Obscure filesystems. +install cramfs /bin/false +install freevxfs /bin/false +install jffs2 /bin/false +install hfs /bin/false +install hfsplus /bin/false +install squashfs /bin/false +install udf /bin/false + diff --git a/float/roles/float-base/files/node-exporter-freeipmi.awk b/float/roles/float-base/files/node-exporter-freeipmi.awk new file mode 100644 index 0000000..a35e396 --- /dev/null +++ b/float/roles/float-base/files/node-exporter-freeipmi.awk @@ -0,0 +1,87 @@ +#!/bin/awk -f + +function export(values, name) { + if (values["metric_count"] < 1) { + return + } + delete values["metric_count"] + + printf("# HELP %s%s %s sensor reading from freeipmi\n", namespace, name, help[name]); + printf("# TYPE %s%s gauge\n", namespace, name); + for (sensor in values) { + printf("%s%s{sensor=\"%s\"} %f\n", namespace, name, sensor, values[sensor]); + } +} + +# Fields are Bar separated, with space padding. +BEGIN { + FS = "[ ]*[|][ ]*"; + namespace = "node_ipmi_"; + + # Friendly description of the type of sensor for HELP. + help["temperature_celsius"] = "Temperature"; + help["volts"] = "Voltage"; + help["amperes"] = "Current"; + help["power_watts"] = "Power"; + help["speed_rpm"] = "Fan"; + help["status"] = "Chassis status"; + + temperature_celsius["metric_count"] = 0; + volts["metric_count"] = 0; + amperes["metric_count"] = 0; + power_watts["metric_count"] = 0; + speed_rpm["metric_count"] = 0; + status["metric_count"] = 0; +} + +# Not a valid line. +{ + if (NF < 3) { + next + } +} + +# $4 is value field. +$4 ~ /N\/A/ { + next +} + +# $5 is units field. +$5 ~ /C/ { + temperature_celsius[$2] = $4; + temperature_celsius["metric_count"]++; +} + +$5 ~ /V/ { + volts[$2] = $4; + volts["metric_count"]++; +} + +$5 ~ /A/ { + amperes[$2] = $4; + amperes["metric_count"]++; +} + +$5 ~ /W/ { + power_watts[$2] = $4; + power_watts["metric_count"]++; +} + +$5 ~ /RPM/ { + speed_rpm[$2] = $4; + speed_rpm["metric_count"]++; +} + +$2 ~ /Chassis/ { + status[$2] = sprintf("%d", substr($4,3,2)); + status["metric_count"]++; +} + +END { + export(temperature_celsius, "temperature_celsius"); + export(volts, "volts"); + export(amperes, "amperes"); + export(power_watts, "power_watts"); + export(speed_rpm, "speed_rpm"); + export(status, "status"); +} diff --git a/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh b/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh new file mode 100644 index 0000000..2ad1f81 --- /dev/null +++ b/float/roles/float-base/files/node-exporter-scripts/freeipmi.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +ipmi-sensors | awk -f /usr/lib/float/node-exporter-freeipmi.awk diff --git a/float/roles/float-base/files/node-exporter-scripts/smartmon.py b/float/roles/float-base/files/node-exporter-scripts/smartmon.py index 3dd0c8f..8980e20 100644 --- a/float/roles/float-base/files/node-exporter-scripts/smartmon.py +++ b/float/roles/float-base/files/node-exporter-scripts/smartmon.py @@ -232,16 +232,13 @@ def device_smart_capabilities(device): (bool): True whenever SMART is available, False otherwise. (bool): True whenever SMART is enabled, False otherwise. """ - groups = device_info(device) - - state = { - g[1].split(' ', 1)[0] - for g in groups if g[0] == 'SMART support'} - - smart_available = 'Available' in state - smart_enabled = 'Enabled' in state - - return smart_available, smart_enabled + try: + subprocess.check_call( + ['/usr/sbin/smartctl', '--info'] + device.smartctl_select(), + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return True, True + except subprocess.CalledProcessError: + return False, False def collect_device_info(device): diff --git a/float/roles/float-base/tasks/apt.yml b/float/roles/float-base/tasks/apt.yml index c592867..7923add 100644 --- a/float/roles/float-base/tasks/apt.yml +++ b/float/roles/float-base/tasks/apt.yml @@ -45,6 +45,15 @@ - "deb http://deb.autistici.org/urepo ai3/" - "deb http://deb.autistici.org/urepo buster-podman/" +- set_fact: + apt_debian_components: + - main + - contrib + - non-free +- set_fact: + apt_debian_components: "{{ apt_debian_components + ['non-free-firmware'] }}" + when: "float_debian_dist not in ('buster', 'bullseye')" + - name: Install our standard sources.list template: src: "sources.list.j2" @@ -59,6 +68,7 @@ - stretch - buster - bullseye + - bookworm - name: Run apt update apt: @@ -78,9 +88,9 @@ # When testing, try to make dpkg faster by disabling fsync. - name: Speed up dpkg - apt: - name: dpkg-eatmydata - state: present + copy: + dest: "/etc/dpkg/dpkg.cfg.d/no-sync" + content: "force-unsafe-io\n" when: "testing|default(True)" # Remove legacy stretch/buster mtail package pin. @@ -89,53 +99,63 @@ path: "/etc/apt/preferences.d/99float-syslog" state: absent -- name: Install base packages - apt: - name: "{{ packages }}" - state: present - vars: - packages: +- set_fact: + extra_packages: [] + ssh_packages: [] + base_packages: + # Standard Debian packages + - acpid + - auditd - ca-certificates - - unattended-upgrades - - systemd-coredump - - rsync + - curl - git + - gpg + - jq + - lsof + - mtail - ntp - openssl - - curl - - lsof + - prometheus-node-exporter + - prometheus-node-exporter-collectors + - rsync + - rsyslog + - rsyslog-exporter + - rsyslog-relp + - rsyslog-openssl + - systemd-coredump + - unattended-upgrades + - zstd + + # Custom packages + - assetmon + - audisp-json - cgroups-exporter + - firewall + - litestream - logcat - - tabacco - restic - - litestream - runcron - - acpid - - zstd - - man-db - - jq - - gpg - - firewall - - rsyslog - - rsyslog-relp - - rsyslog-exporter - - mtail - - auditd - - audisp-json - - prometheus-node-exporter - - prometheus-node-exporter-collectors - - assetmon + - tabacco -- name: Install extra packages - apt: - name: "{{ extra_packages }}" - state: present - vars: +- set_fact: + ssh_packages: + - ssh-key-wtmp + when: "enable_ssh and float_debian_dist != 'bullseye'" + +- set_fact: extra_packages: - net-tools - vim when: "not testing|default(True)" +- set_fact: + all_packages: "{{ base_packages + ssh_packages + extra_packages }}" + +- name: Install packages + apt: + name: "{{ all_packages }}" + state: present + - name: Remove blacklisted packages apt: name: "{{ packages }}" diff --git a/float/roles/float-base/tasks/harden.yml b/float/roles/float-base/tasks/harden.yml index 3202889..4dd4db9 100644 --- a/float/roles/float-base/tasks/harden.yml +++ b/float/roles/float-base/tasks/harden.yml @@ -103,3 +103,8 @@ - 'disable-kmod-load.service' ignore_errors: "{{ ansible_check_mode }}" +- name: Configure module options and blocklists + copy: + src: "modprobe-hardening.conf" + dest: "/etc/modprobe.d/security.conf" + diff --git a/float/roles/float-base/tasks/ipmi.yml b/float/roles/float-base/tasks/ipmi.yml index 6367e07..c530db2 100644 --- a/float/roles/float-base/tasks/ipmi.yml +++ b/float/roles/float-base/tasks/ipmi.yml @@ -1,15 +1,25 @@ --- -- name: Install ipmitool packages and dependency +- name: Install freeipmi packages and dependency apt: name: "{{ packages }}" state: present vars: packages: - - ipmitool - - gawk # prometheus-node-exporter-ipmitool-sensor dependency + - freeipmi-tools + - gawk # node-exporter-freeipmi.awk dependency -- name: Enable prometheus node-exporter ipmitool sensor +- name: Remove ipmitool, not used anymore + apt: + name: ipmitool + state: absent + +- name: Disable prometheus node-exporter ipmitool sensor systemd: name: prometheus-node-exporter-ipmitool-sensor.timer - state: started - enabled: yes + state: stopped + enabled: no + +- name: Install freeipmi node-exporter script + copy: + src: "node-exporter-freeipmi.awk" + dest: "/usr/lib/float/node-exporter-freeipmi.awk" diff --git a/float/roles/float-base/tasks/main.yml b/float/roles/float-base/tasks/main.yml index af7e332..41b65d5 100644 --- a/float/roles/float-base/tasks/main.yml +++ b/float/roles/float-base/tasks/main.yml @@ -40,6 +40,14 @@ - include_tasks: rollback_protection.yml when: "git_revision != 'none' and not testing|default(True)" +# Detect virtual machines / physical hardware. +- name: Detect virtual machine + slurp: + src: "/sys/class/dmi/id/sys_vendor" + register: slurp_sysfs_dmi_vendor +- set_fact: + float_is_vm: "{{ slurp_sysfs_dmi_vendor['content'] | b64decode == 'QEMU' }}" + # Create the /usr/lib/float and /var/lib/float directories for # internal scripts. - file: @@ -94,5 +102,7 @@ - include_tasks: ipmi.yml when: ipmi_device.stat.exists == true +- include_tasks: systemd.yml + # Finally run some cleanups. - import_tasks: cleanup.yml diff --git a/float/roles/float-base/tasks/rollback_protection.yml b/float/roles/float-base/tasks/rollback_protection.yml index 1d42d82..4de38a4 100644 --- a/float/roles/float-base/tasks/rollback_protection.yml +++ b/float/roles/float-base/tasks/rollback_protection.yml @@ -50,6 +50,7 @@ float with "-e rollback=true". when: "commit_guard_stat.stat.exists and commit_compare.rc != 0 and not skip_rollback_protection" -- copy: +- name: Update current git revision + copy: dest: /etc/.float-ansible-commit content: "{{ git_revision }}\n" diff --git a/float/roles/float-base/tasks/ssh.yml b/float/roles/float-base/tasks/ssh.yml index 8b0fbee..e19165d 100644 --- a/float/roles/float-base/tasks/ssh.yml +++ b/float/roles/float-base/tasks/ssh.yml @@ -70,11 +70,13 @@ path: /etc/ssh/authorized_keys state: directory +- float_authorized_keys: {} + # Configure root's authorized_keys with the admin keys. - name: Install admin public keys authorized_key: user: root - key: "{% if emergency_ssh_key %}{{ emergency_ssh_key }}\n{% endif %}{% for a in admins %}{% for k in a.get('ssh_keys', []) %}{{ k }}\n{% endfor %}{% endfor %}" + key: "{% if emergency_ssh_key %}{{ emergency_ssh_key }}\n{% endif %}{{ float_authorized_keys }}" path: /etc/ssh/authorized_keys/root manage_dir: no state: present diff --git a/float/roles/float-base/tasks/systemd.yml b/float/roles/float-base/tasks/systemd.yml new file mode 100644 index 0000000..896d5fd --- /dev/null +++ b/float/roles/float-base/tasks/systemd.yml @@ -0,0 +1,13 @@ +--- + +# Find the systemd units matching locally running services, and ensure +# that they are set to always restart. +- set_fact: + local_systemd_units: "{{ float_enabled_services | map('extract', services) | rejectattr('systemd_services', 'undefined') | map(attribute='systemd_services') | flatten | reject('search', '^docker-') }}" +- name: Fix systemd services to autorestart + include_role: + name: float-util-systemd-custom-snippet + vars: + systemd_unit: "{{ item }}" + fix_restart: true + loop: "{{ local_systemd_units }}" diff --git a/float/roles/float-base/templates/firewall/10float.j2 b/float/roles/float-base/templates/firewall/10float.j2 index e8888a4..48c8894 100644 --- a/float/roles/float-base/templates/firewall/10float.j2 +++ b/float/roles/float-base/templates/firewall/10float.j2 @@ -2,10 +2,10 @@ # specific sets of hosts. {% macro allow_host_ips(h, chain) %} -{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv4 | sort %} +{% for ip in hostvars[h]['ips'] | ansible.utils.ipv4 | sort %} add_rule4 -A {{ chain }} -s {{ ip }} -j ACCEPT {% endfor %} -{% for ip in hostvars[h]['ips'] | ansible.netcommon.ipv6 | sort %} +{% for ip in hostvars[h]['ips'] | ansible.utils.ipv6 | sort %} add_rule6 -A {{ chain }} -s {{ ip }} -j ACCEPT {% endfor %} {% endmacro %} diff --git a/float/roles/float-base/templates/rsyslog.conf.j2 b/float/roles/float-base/templates/rsyslog.conf.j2 index a9de870..e4b9063 100644 --- a/float/roles/float-base/templates/rsyslog.conf.j2 +++ b/float/roles/float-base/templates/rsyslog.conf.j2 @@ -1,5 +1,6 @@ global( maxMessageSize="64k" + defaultNetstreamDriver="ossl" ) module(load="imuxsock" @@ -30,7 +31,12 @@ ruleset(name="process_stats") { action( type="omprog" name="to_exporter" +{% if float_debian_dist in ('buster', 'bullseye') %} binary="/usr/bin/rsyslog_exporter -web.listen-address=:9106" +{% else %} + binary="/usr/bin/rsyslog_exporter -silent -web.listen-address=:9106" +{% endif %} + queue.type="linkedlist" queue.workerThreads="1" ) diff --git a/float/roles/float-base/templates/sources.list.j2 b/float/roles/float-base/templates/sources.list.j2 index 939e9ed..dc0a5c6 100644 --- a/float/roles/float-base/templates/sources.list.j2 +++ b/float/roles/float-base/templates/sources.list.j2 @@ -1,5 +1,5 @@ {% if apt_sources_list_override is defined %}{{ apt_sources_list_override }}{% else %} -deb http://deb.debian.org/debian {{ float_debian_dist }} main contrib non-free -deb http://deb.debian.org/debian {{ float_debian_dist }}-updates main contrib non-free -deb http://security.debian.org/debian-security {{ float_debian_dist }}-security main contrib non-free +deb http://deb.debian.org/debian {{ float_debian_dist }} {{ apt_debian_components | join(' ') }} +deb http://deb.debian.org/debian {{ float_debian_dist }}-updates {{ apt_debian_components | join(' ') }} +deb http://security.debian.org/debian-security {{ float_debian_dist }}-security {{ apt_debian_components | join(' ') }} {% endif %} diff --git a/float/roles/float-base/templates/ssh/sshd_config.j2 b/float/roles/float-base/templates/ssh/sshd_config.j2 index 73a5610..beae646 100644 --- a/float/roles/float-base/templates/ssh/sshd_config.j2 +++ b/float/roles/float-base/templates/ssh/sshd_config.j2 @@ -2,9 +2,7 @@ # See the sshd_config(5) manpage for details Port {{ ssh_port }} -#ListenAddress :: -#ListenAddress 0.0.0.0 -Protocol 2 +AddressFamily any # HostKeys for protocol version 2 {% for key_type in ssh_host_key_types %} @@ -12,11 +10,6 @@ HostKey /etc/ssh/ssh_host_{{ key_type }}_key HostCertificate /etc/ssh/ssh_host_{{ key_type }}_key-cert.pub {% endfor %} -# Ciphers and MACs -KexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group-exchange-sha256 -Ciphers chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com,aes256-ctr,aes192-ctr,aes128-ctr -MACs hmac-sha2-512-etm@openssh.com,hmac-sha2-256-etm@openssh.com,umac-128-etm@openssh.com,hmac-sha2-512,hmac-sha2-256,umac-128@openssh.com - # Logging. VERBOSE logs the fingerprint of keys used to login. SyslogFacility AUTH LogLevel VERBOSE @@ -24,18 +17,20 @@ LogLevel VERBOSE # Authentication: StrictModes yes AuthorizedKeysFile /etc/ssh/authorized_keys/%u -PermitRootLogin without-password +PermitRootLogin prohibit-password PermitEmptyPasswords no PubkeyAuthentication yes UsePAM yes +# Allow more attempts for people with many keys in their agent. +MaxAuthTries 10 + # Disable a bunch of features. IgnoreRhosts yes HostbasedAuthentication no ChallengeResponseAuthentication no PasswordAuthentication no UseDNS no -#IgnoreUserKnownHosts yes # Makes ansible faster? PrintMotd no @@ -49,7 +44,15 @@ ClientAliveInterval 120 #AcceptEnv LANG LC_* # Be restrictive on forwarding / proxying. + +# Disable agent forwarding for the clients' safety. AllowAgentForwarding no -AllowTcpForwarding no -X11Forwarding no -#PermitTunnel no + +# Reverse forwarding can lead to security issues due to manipulating +# the network perimeter. +AllowTcpForwarding local +AllowStreamLocalForwarding local +PermitListen none + +Subsystem sftp /usr/lib/openssh/sftp-server + diff --git a/float/roles/float-base/templates/sysctl.conf.j2 b/float/roles/float-base/templates/sysctl.conf.j2 index c28c31e..b06208e 100644 --- a/float/roles/float-base/templates/sysctl.conf.j2 +++ b/float/roles/float-base/templates/sysctl.conf.j2 @@ -49,6 +49,9 @@ net.netfilter.nf_conntrack_max={{ nf_conntrack_max }} # Restrict core dumps for SUID binaries. fs.suid_dumpable=0 +# Protect against time-wait assassination. +net.ipv4.tcp_rfc1337=1 + # Disable source routed packet acceptance. net.ipv4.conf.all.accept_source_route=0 net.ipv4.conf.default.accept_source_route=0 @@ -77,6 +80,9 @@ net.ipv4.conf.default.rp_filter=1 # https://lore.kernel.org/patchwork/patch/1034150 dev.tty.ldisc_autoload=0 +# Restrict userfaultfd() syscall to the CAP_SYS_PTRACE capability. +vm.unprivileged_userfaultfd=0 + # Additional protections for fifos, hardlinks, regular files, and symlinks # https://patchwork.kernel.org/patch/10244781 # slightly tightened up from the systemd default values of "1" for each @@ -114,9 +120,7 @@ kernel.sysrq=0 # (linux-hardened default) net.core.bpf_jit_harden=2 kernel.unprivileged_bpf_disabled=1 -{% endif %} -{% if not disable_restricted_sysctl %} # Disable unprivileged user namespaces # https://lwn.net/Articles/673597 # (linux-hardened default) diff --git a/float/roles/float-base/templates/vhostmap.prom.j2 b/float/roles/float-base/templates/vhostmap.prom.j2 index 4444450..08ea619 100644 --- a/float/roles/float-base/templates/vhostmap.prom.j2 +++ b/float/roles/float-base/templates/vhostmap.prom.j2 @@ -6,7 +6,7 @@ Skip public_endpoints with a path, to avoid duplication of entries. #} {% for service_name, service in services | dictsort %} -{% for ep in service.get('public_endpoints', []) %} +{% for ep in service.get('public_endpoints', []) if ep.get('path', '/') == '/' %} {% for systemd_service in service.get('systemd_services', []) %} {% for d in domain_public %} {% if ep.sharded | default(False) %} diff --git a/float/roles/float-base/vars/main.yml b/float/roles/float-base/vars/main.yml index 7a45a63..dc6941a 100644 --- a/float/roles/float-base/vars/main.yml +++ b/float/roles/float-base/vars/main.yml @@ -1,5 +1,5 @@ --- # Define the 'rollback' variable to bypass rollback protection. -skip_rollback_protection: "{{ rollback | default(False) | bool }}" +skip_rollback_protection: "{{ rollback | default(False) }}" diff --git a/float/roles/float-infra-acme/tasks/main.yml b/float/roles/float-infra-acme/tasks/main.yml index 07e922b..9c00041 100644 --- a/float/roles/float-infra-acme/tasks/main.yml +++ b/float/roles/float-infra-acme/tasks/main.yml @@ -31,7 +31,16 @@ state: directory owner: acmeserver group: acmeserver - mode: 0700 + mode: "0700" + +- name: Install ACME private key + copy: + content: "{{ acme_private_key }}\n" + dest: "/var/lib/acme/account.key" + owner: acmeserver + group: acmeserver + mode: "0600" + when: acme_private_key is defined - name: Add the acmeserver user to the public-credentials and acme-credentials group user: diff --git a/float/roles/float-infra-admin-dashboard/handlers/main.yml b/float/roles/float-infra-admin-dashboard/handlers/main.yml deleted file mode 100644 index 9cb836e..0000000 --- a/float/roles/float-infra-admin-dashboard/handlers/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- - -- name: reload admin-dashboard - systemd: - name: docker-admin-dashboard-http - state: restarted diff --git a/float/roles/float-infra-dns/defaults/main.yml b/float/roles/float-infra-dns/defaults/main.yml index a66e91e..7b7ddb5 100644 --- a/float/roles/float-infra-dns/defaults/main.yml +++ b/float/roles/float-infra-dns/defaults/main.yml @@ -4,3 +4,7 @@ # By default, this is the first public domain. mx_ns_domain: "{{ domain_public[0] }}" +# The default CAA record for all zones points to LE since that is what +# 'acmeserver' uses. +dns_caa_record: '0 issue "letsencrypt.org"' + diff --git a/float/roles/float-infra-dns/templates/bind/named.conf.options b/float/roles/float-infra-dns/templates/bind/named.conf.options index fb34501..b0ace7b 100644 --- a/float/roles/float-infra-dns/templates/bind/named.conf.options +++ b/float/roles/float-infra-dns/templates/bind/named.conf.options @@ -10,7 +10,7 @@ options { {% if float_limit_bind_to_known_interfaces | default(False) %} listen-on { 127.0.0.1; -{% for ip in ips | ansible.netcommon.ipv4 | sort %} +{% for ip in ips | ansible.utils.ipv4 | sort %} {{ ip }}; {% endfor %} {% for n in net_overlays | sort if ('ip_' + n.name) in hostvars[inventory_hostname] %} @@ -19,7 +19,7 @@ options { }; listen-on-v6 { ::1; -{% for ip in ips | ansible.netcommon.ipv6 | sort %} +{% for ip in ips | ansible.utils.ipv6 | sort %} {{ ip }}; {% endfor %} }; @@ -39,6 +39,12 @@ options { // Conform to RFC1035. auth-nxdomain no; + // Increase tcp-client limit from default, and prevent + // idle connections from hanging around. + tcp-clients 2000; + tcp-idle-timeout 50; + tcp-keepalive-timeout 50; + allow-transfer { none; }; allow-query { localhost; diff --git a/float/roles/float-infra-dns/templates/dns/infra.yml b/float/roles/float-infra-dns/templates/dns/infra.yml index b104d31..dce0d69 100644 --- a/float/roles/float-infra-dns/templates/dns/infra.yml +++ b/float/roles/float-infra-dns/templates/dns/infra.yml @@ -3,8 +3,8 @@ "@ns": _: {% for h in services['dns'].hosts | sort %} -{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %} -{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %} +{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %} +{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %} {% if host_ip4 %} - NS ns{{ loop.index }}.{{ mx_ns_domain }}. {% endif %} @@ -12,6 +12,9 @@ - NS ns{{ loop.index }}-v6.{{ mx_ns_domain }}. {% endif %} {% endfor %} +{% if dns_caa_record is defined %} + - "CAA {{ dns_caa_record | regex_replace('\"', '\\\"') }}" +{% endif %} "@base": EXTENDS: "@ns" @@ -32,8 +35,8 @@ # The explicit NS delegation for 'l' is necessary for dnssec-sign to work properly. l: {% for h in services['dns'].hosts | sort %} -{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %} -{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %} +{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %} +{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %} {% if host_ip4 %} - NS ns{{ loop.index }}.{{ mx_ns_domain }}. {% endif %} @@ -45,8 +48,8 @@ {% if d == mx_ns_domain %} {# Only generate the nameservers' A records on the chosen zone #} {% for h in services['dns'].hosts | sort %} -{% set host_ip4 = hostvars[h]['public_ips'] | ansible.netcommon.ipv4 %} -{% set host_ip6 = hostvars[h]['public_ips'] | ansible.netcommon.ipv6 %} +{% set host_ip4 = hostvars[h]['public_ips'] | ansible.utils.ipv4 %} +{% set host_ip6 = hostvars[h]['public_ips'] | ansible.utils.ipv6 %} {% if host_ip4 %} ns{{ loop.index }}: {{ host_ip4 | to_json }} {% endif %} diff --git a/float/roles/float-infra-dns/templates/zonetool.yml b/float/roles/float-infra-dns/templates/zonetool.yml index 4f6fcbd..b80c198 100644 --- a/float/roles/float-infra-dns/templates/zonetool.yml +++ b/float/roles/float-infra-dns/templates/zonetool.yml @@ -1,6 +1,6 @@ --- {% set all_ips = services['frontend'].hosts | map('extract', hostvars) | rejectattr('traffic', 'false') | map(attribute='public_ips') | reject('undefined') | flatten %} -FRONTENDS4: {{ all_ips | ansible.netcommon.ipv4 | list | to_json }} +FRONTENDS4: {{ all_ips | ansible.utils.ipv4 | list | to_json }} -FRONTENDS6: {{ all_ips | ansible.netcommon.ipv6 | list | to_json }} +FRONTENDS6: {{ all_ips | ansible.utils.ipv6 | list | to_json }} diff --git a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 index ef3c340..ad75398 100644 --- a/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 +++ b/float/roles/float-infra-haproxy/templates/haproxy.cfg.j2 @@ -26,7 +26,9 @@ defaults frontend stats bind :::8404 mode http +{% if float_debian_dist in ('buster', 'bullseye') %} option http-use-htx +{% endif %} http-request use-service prometheus-exporter if { path /metrics } stats enable stats uri /stats diff --git a/float/roles/float-infra-log-collector/defaults/main.yml b/float/roles/float-infra-log-collector/defaults/main.yml index ff97a7f..46142d7 100644 --- a/float/roles/float-infra-log-collector/defaults/main.yml +++ b/float/roles/float-infra-log-collector/defaults/main.yml @@ -3,8 +3,6 @@ # Whether to set up elasticsearch/kibana at all. enable_elasticsearch: true -es_major_version: "6" - # This number is very low and only useful for the testing environment. es_heap_size: "166m" diff --git a/float/roles/float-infra-log-collector/tasks/main.yml b/float/roles/float-infra-log-collector/tasks/main.yml index c86da8d..e9bb796 100644 --- a/float/roles/float-infra-log-collector/tasks/main.yml +++ b/float/roles/float-infra-log-collector/tasks/main.yml @@ -49,5 +49,14 @@ template: src: "rsyslog-collector.conf.j2" dest: "/etc/rsyslog-collector.conf" + vars: + rsyslog_port: 6514 + rsyslog_exporter_port: 9105 + rsyslog_elasticsearch_host: "127.0.0.1" + rsyslog_elasticsearch_port: 9200 + rsyslog_tls_ca: "/etc/credentials/x509/log-collector/ca.pem" + rsyslog_tls_cert: "/etc/credentials/x509/log-collector/server/cert.pem" + rsyslog_tls_key: "/etc/credentials/x509/log-collector/server/private_key.pem" + rsyslog_tls_permittedpeer: "*.{{ domain }}" notify: "restart rsyslog-collector" diff --git a/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2 b/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2 index 9adc7e1..f96c854 100644 --- a/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2 +++ b/float/roles/float-infra-log-collector/templates/rsyslog-collector.conf.j2 @@ -1,6 +1,8 @@ global( maxMessageSize="64k" + workDirectory="/var/spool/rsyslog" + defaultNetstreamDriver="ossl" ) main_queue( @@ -24,7 +26,7 @@ ruleset(name="process_stats") { action( type="omprog" name="to_exporter" - binary="/usr/bin/rsyslog_exporter -web.listen-address=:9105" + binary="/usr/bin/rsyslog_exporter -web.listen-address=:{{ rsyslog_exporter_port }} -silent" queue.type="linkedlist" queue.workerThreads="1" ) @@ -165,8 +167,8 @@ ruleset(name="incoming"){ if ($syslogfacility-text == "auth" and $programname == "audit") then { # Structured audit logs go to a dedicated Elasticsearch index. action(type="omelasticsearch" - server="127.0.0.1" - serverport="9200" + server="{{ rsyslog_elasticsearch_host }}" + serverport="{{ rsyslog_elasticsearch_port }}" template="esTemplateAudit" searchIndex="esIndexAudit" searchType="_doc" @@ -179,7 +181,6 @@ ruleset(name="incoming"){ queue.mindequeuebatchsize="100" queue.mindequeuebatchsize.timeout="3000" queue.filename="es-audit" - queue.spoolDirectory="/var/spool/rsyslog" action.resumeretrycount="-1") } else { # Extension point for rules applying to structured logs. @@ -191,8 +192,8 @@ ruleset(name="incoming"){ # Normal structured log present in the default syslog flow. Send # straight to Elasticsearch, skipping the log normalization step. action(type="omelasticsearch" - server="127.0.0.1" - serverport="9200" + server="{{ rsyslog_elasticsearch_host }}" + serverport="{{ rsyslog_elasticsearch_port }}" template="esTemplateJSON" searchIndex="esIndex" searchType="_doc" @@ -205,7 +206,6 @@ ruleset(name="incoming"){ queue.mindequeuebatchsize="100" queue.mindequeuebatchsize.timeout="3000" queue.filename="es-structured" - queue.spoolDirectory="/var/spool/rsyslog" action.resumeretrycount="-1") } } else if ($syslogfacility-text == "local3") then { @@ -219,8 +219,8 @@ ruleset(name="incoming"){ set $!request = "/sso_login?"; } action(type="omelasticsearch" - server="127.0.0.1" - serverport="9200" + server="{{ rsyslog_elasticsearch_host }}" + serverport="{{ rsyslog_elasticsearch_port }}" template="esTemplateHTTP" searchIndex="esIndexHTTP" searchType="_doc" @@ -233,7 +233,6 @@ ruleset(name="incoming"){ queue.mindequeuebatchsize="100" queue.mindequeuebatchsize.timeout="3000" queue.filename="es-http" - queue.spoolDirectory="/var/spool/rsyslog" action.resumeretrycount="-1") } else { # Traditional syslog message. Run it through mmnormalize to @@ -268,8 +267,8 @@ ruleset(name="incoming"){ # valid and ES will refuse it. set $!ignore = "1"; action(type="omelasticsearch" - server="127.0.0.1" - serverport="9200" + server="{{ rsyslog_elasticsearch_host }}" + serverport="{{ rsyslog_elasticsearch_port }}" template="esTemplate" searchIndex="esIndex" searchType="_doc" @@ -282,7 +281,6 @@ ruleset(name="incoming"){ queue.mindequeuebatchsize="100" queue.mindequeuebatchsize.timeout="3000" queue.filename="es-default" - queue.spoolDirectory="/var/spool/rsyslog" action.resumeretrycount="-1") } {% endif %} @@ -295,14 +293,14 @@ module( input( type="imrelp" - port="6514" + port="{{ rsyslog_port }}" maxDataSize="64k" ruleset="incoming" tls="on" tls.compression="on" - tls.cacert="/etc/credentials/x509/log-collector/ca.pem" - tls.mycert="/etc/credentials/x509/log-collector/server/cert.pem" - tls.myprivkey="/etc/credentials/x509/log-collector/server/private_key.pem" - tls.permittedpeer="*.{{ domain }}" + tls.cacert="{{ rsyslog_tls_ca }}" + tls.mycert="{{ rsyslog_tls_cert }}" + tls.myprivkey="{{ rsyslog_tls_key }}" + tls.permittedpeer="{{ rsyslog_tls_permittedpeer }}" tls.authmode="certvalid" ) diff --git a/float/roles/float-infra-nginx/handlers/main.yml b/float/roles/float-infra-nginx/handlers/main.yml index b41c1ff..397098e 100644 --- a/float/roles/float-infra-nginx/handlers/main.yml +++ b/float/roles/float-infra-nginx/handlers/main.yml @@ -11,3 +11,9 @@ systemd: name: firewall.service state: restarted + +- name: reload mtail + systemd: + name: mtail.service + state: restarted + diff --git a/float/roles/float-infra-nginx/meta/main.yml b/float/roles/float-infra-nginx/meta/main.yml index e57ebd7..ccc8705 100644 --- a/float/roles/float-infra-nginx/meta/main.yml +++ b/float/roles/float-infra-nginx/meta/main.yml @@ -4,4 +4,4 @@ dependencies: - role: float-base-public-credentials vars: credentials_type: http - + - role: float-util-tor-exits-dataset diff --git a/float/roles/float-infra-nginx/tasks/nginx.yml b/float/roles/float-infra-nginx/tasks/nginx.yml index 33a801a..e30b833 100644 --- a/float/roles/float-infra-nginx/tasks/nginx.yml +++ b/float/roles/float-infra-nginx/tasks/nginx.yml @@ -8,6 +8,7 @@ packages: - sso-proxy - nginx-full + - libnginx-mod-http-headers-more-filter # SSO proxy setup. - name: Configure /etc/default/sso-proxy @@ -113,7 +114,7 @@ - "50-mod-http-upstream-fair.conf" - "50-mod-http-xslt-filter.conf" - "50-mod-mail.conf" - - "50-mod-stream.conf" + notify: reload nginx # Setup the HTTP router configuration. - name: Configure NGINX (upstreams) @@ -151,14 +152,14 @@ file: path: /var/www/html/__errors state: directory - when: "nginx_install_custom_error_pages | bool" + when: nginx_install_custom_error_pages - name: Copy custom error messages copy: src: "{{ item }}" dest: /var/www/html/__errors/ with_fileglob: "errors/*" - when: "nginx_install_custom_error_pages | bool" + when: nginx_install_custom_error_pages # Create the cache directory. - file: @@ -191,6 +192,7 @@ template: src: "nginx.mtail.j2" dest: "/etc/mtail/nginx.mtail" + notify: reload mtail # Misc cleanup of old files. - name: Remove obsolete files diff --git a/float/roles/float-infra-nginx/templates/config/accept.map b/float/roles/float-infra-nginx/templates/config/accept.map new file mode 100644 index 0000000..b552866 --- /dev/null +++ b/float/roles/float-infra-nginx/templates/config/accept.map @@ -0,0 +1,4 @@ +map $http_accept $http_accept_simplified { + default $http_accept; + ~text/html html; +} diff --git a/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf b/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf index d52e34c..87be0b7 100644 --- a/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf +++ b/float/roles/float-infra-nginx/templates/config/conf.d/gzip.conf @@ -18,5 +18,6 @@ gzip_types image/svg+xml image/x-icon text/css + text/javascript text/plain; diff --git a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf index 68bbc39..7d31efc 100644 --- a/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf +++ b/float/roles/float-infra-nginx/templates/config/conf.d/proxy.conf @@ -20,8 +20,9 @@ proxy_redirect off; proxy_http_version 1.1; # Set up a global cache. +include /etc/nginx/accept.map; proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=global:{{ nginx_cache_keys_mem }} max_size={{ nginx_cache_fs_size }} inactive=1d use_temp_path=off; -proxy_cache_key "$scheme$host$request_uri$sent_http_content_language"; +proxy_cache_key "$scheme$host$request_uri$http_accept_simplified$sent_http_content_language"; proxy_no_cache $cookie_SSO $http_authorization; proxy_cache_methods GET HEAD; proxy_cache_valid 200 10m; diff --git a/float/roles/float-infra-nginx/templates/config/nginx.conf b/float/roles/float-infra-nginx/templates/config/nginx.conf index 15896af..8a7066e 100644 --- a/float/roles/float-infra-nginx/templates/config/nginx.conf +++ b/float/roles/float-infra-nginx/templates/config/nginx.conf @@ -1,3 +1,4 @@ +include /etc/nginx/modules-enabled/*.conf; worker_processes auto; worker_rlimit_nofile {{ nginx_worker_connections * 2 }}; diff --git a/float/roles/float-infra-nginx/templates/nginx.mtail.j2 b/float/roles/float-infra-nginx/templates/nginx.mtail.j2 index b7a292b..e8f414e 100644 --- a/float/roles/float-infra-nginx/templates/nginx.mtail.j2 +++ b/float/roles/float-infra-nginx/templates/nginx.mtail.j2 @@ -4,22 +4,25 @@ counter nginx_http_requests by host, vhost, method, code counter nginx_http_requests_cache by host, vhost, cache_status counter nginx_http_bytes by host, vhost, method, code counter nginx_http_bytes_cache by host, vhost, cache_status -counter nginx_http_requests_ms by le, host, vhost, method, code +counter nginx_http_requests_ms by le, host, vhost, method /(?P<hostname>[-0-9A-Za-z._:]+) nginx_access: (?P<vhost>[-0-9A-Za-z._:]+) \S+ (?P<remote_addr>[0-9a-f\.:]+) - - \[[^\]]+\] "(?P<request_method>[A-Z]+) (?P<request_uri>\S+) (?P<http_version>HTTP\/[0-9\.]+)" (?P<status>\d{3}) ((?P<response_size>\d+)|-) "[^"]*" "[^"]*" ([-0-9A-Za-z._:]+) ((?P<ups_resp_seconds>\d+\.\d+)|-) (?P<request_seconds>\d+)\.(?P<request_milliseconds>\d+) (?P<cache_status>\S+)/ { nginx_http_request_total++ nginx_http_requests[$hostname][$vhost][$request_method][$status]++ - nginx_http_requests_cache[$hostname][$vhost][$cache_status]++ nginx_http_bytes[$hostname][$vhost][$request_method][$status] += $response_size - nginx_http_bytes_cache[$hostname][$vhost][$cache_status] += $response_size + + int($status) == 200 { + nginx_http_requests_cache[$hostname][$vhost][$cache_status]++ + nginx_http_bytes_cache[$hostname][$vhost][$cache_status] += $response_size {# 10ms-5s buckets, with factor=sqrt(2) #} {% for bucket_ms in [10, 14, 20, 28, 40, 57, 80, 113, 160, 226, 320, 453, 640, 905, 1280, 1810, 2560, 3620, 5119] %} - $request_seconds * 1000 + $request_milliseconds < {{ bucket_ms }} { - nginx_http_requests_ms["{{ bucket_ms }}"][$hostname][$vhost][$request_method][$status]++ - } + $request_seconds * 1000 + $request_milliseconds < {{ bucket_ms }} { + nginx_http_requests_ms["{{ bucket_ms }}"][$hostname][$vhost][$request_method]++ + } {% endfor %} - nginx_http_requests_ms["inf"][$hostname][$vhost][$request_method][$status]++ + nginx_http_requests_ms["inf"][$hostname][$vhost][$request_method]++ + } } diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json index 808c733..9c53f9e 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/dns.json @@ -3,21 +3,29 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "Bind9 DNS Service Statistics.", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 12309, - "graphTooltip": 0, - "id": 27, - "iteration": 1618527684190, + "graphTooltip": 1, "links": [ { "icon": "external link", @@ -28,1646 +36,472 @@ "url": "https://github.com/pecastro/grafana-dashboards/blob/master/prometheus/bind9-exporter-dns.json" } ], + "liveNow": false, "panels": [ { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "id": 19, - "panels": [], - "repeat": null, - "title": "System", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 1, "fieldConfig": { "defaults": { - "custom": {} + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, "gridPos": { - "h": 4, - "w": 6, + "h": 8, + "w": 12, "x": 0, - "y": 1 - }, - "height": "150", - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "s ago", - "postfixFontSize": "80%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "time() - max(bind_boot_time_seconds{instance=~\"$instance\"}) ", - "interval": "5m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600, - "target": "" - } - ], - "thresholds": "", - "title": "Restarted", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": "${DS_PROMETHEUS}", - "decimals": 1, - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "format": "s", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 1 + "y": 0 }, - "height": "150px", "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "s ago", - "postfixFontSize": "80%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "time() - max(bind_config_time_seconds{instance=~\"$instance\"})", - "interval": "5m", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 600, - "target": "" - } - ], - "thresholds": "", - "title": "Reconfigured", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 3, - "fillGradient": 0, - "gridPos": { - "h": 4, - "w": 12, - "x": 12, - "y": 1 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 3, - "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "increase(process_cpu_seconds_total{instance=~\"$instance\", job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Named CPU Time", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "tooltip": { + "mode": "single", + "sort": "none" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 5 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Max File Descriptors", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "process_max_fds{instance=~\"$instance\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Max", - "refId": "A", - "step": 10, - "target": "" - }, - { - "expr": "process_open_fds{instance=~\"$instance\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Open", - "refId": "B", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "File Descriptors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 32, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by ()", + "legendFormat": "qps", + "range": true, + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Queries", + "type": "timeseries" }, { - "aliasColors": { - "Resident": "#890F02", - "Virtual": "#0A437C", - "Virtual Memory": "#0A437C" + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { - "custom": {}, - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, - "fill": 2, - "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 12, - "y": 5 - }, - "hiddenSeries": false, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "y": 0 }, - "lines": true, - "linewidth": 3, - "links": [], - "nullPointMode": "null", + "id": 3, "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "process_virtual_memory_bytes{instance=~\"$instance\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Virtual", - "refId": "A", - "step": 10, - "target": "" - }, - { - "expr": "process_resident_memory_bytes{instance=~\"$instance\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Resident", - "refId": "B", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "tooltip": { + "mode": "single", + "sort": "none" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 12 - }, - "hiddenSeries": false, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { - "expr": "increase(bind_query_duplicates_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Duplicates", - "refId": "A", - "step": 4, - "target": "" - }, - { - "expr": "increase(bind_query_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ error }}", - "refId": "B", - "step": 4, - "target": "" - }, - { - "expr": "increase(bind_query_recursions_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Recursions", - "refId": "C", - "step": 4, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Queries", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)", + "legendFormat": "__auto", + "range": true, + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Queries (by host)", + "type": "timeseries" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "id": 21, - "panels": [], - "repeat": null, - "title": "Incoming", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { - "custom": {}, - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 20 - }, - "hiddenSeries": false, - "id": 6, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - {} - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(bind_incoming_queries_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ type }}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Incoming Queries", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "decimals": -1, - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 20 + "y": 8 }, - "hiddenSeries": false, - "id": 7, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 4, "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(bind_incoming_requests_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ opcode }}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Incoming Request Opcodes", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "tooltip": { + "mode": "single", + "sort": "none" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 27 }, - "hiddenSeries": false, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "irate(bind_responses_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ result }}", - "refId": "A", - "step": 4, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Response Results", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(bind_resolver_queries_total{host=~\"$host\",view=~\"$view\"}[$__rate_interval])) by (host, view)", + "legendFormat": "{{host}}/{{view}}", + "range": true, + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Resolver Queries", + "type": "timeseries" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 34 + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "id": 23, - "panels": [], - "repeat": null, - "title": "Resolver", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { - "custom": {}, - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "h": 8, + "w": 12, + "x": 12, + "y": 8 }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 5, "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(bind_resolver_response_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ error }}", - "refId": "A", - "step": 4, - "target": "" + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - { - "expr": "irate(bind_resolver_response_lame_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / LAME", - "refId": "B", - "step": 4, - "target": "" - }, - { - "expr": "irate(bind_resolver_response_mismatch_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / MISMATCH", - "refId": "C", - "step": 4, - "target": "" - }, - { - "expr": "irate(bind_resolver_response_truncated_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / TRUNCATED", - "refId": "D", - "step": 4, - "target": "" + "tooltip": { + "mode": "single", + "sort": "none" } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Resolver Response Errors", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "irate(bind_resolver_queries_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ type }}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Resolver Queries", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host) - sum(rate(bind_resolver_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)", + "legendFormat": "__auto", + "range": true, + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Authoritative Queries (by host)", + "type": "timeseries" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 42 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(bind_resolver_query_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ error }}", - "refId": "A", - "step": 10, - "target": "" - }, - { - "expr": "irate(bind_resolver_query_edns0_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / EDNS0", - "refId": "B", - "step": 10, - "target": "" - }, - { - "expr": "irate(bind_resolver_query_retries_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / Retry", - "refId": "C", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Query Errors", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 42 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "irate(bind_resolver_query_duration_seconds_bucket{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ le }}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Query By Duration", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", "fieldConfig": { "defaults": { - "custom": {}, - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "y": 16 }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", + "id": 6, "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "bind_resolver_cache_rrsets{instance=~\"$instance\",job=\"$job\"}", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ type }}", - "refId": "A", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Resolver Cache RR Sets", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "tooltip": { + "mode": "single", + "sort": "none" } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_PROMETHEUS}", - "fieldConfig": { - "defaults": { - "custom": {}, - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "hiddenSeries": false, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, "targets": [ { - "expr": "irate(bind_resolver_dnssec_validation_errors_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / ValErr", - "refId": "A", - "step": 10, - "target": "" - }, - { - "expr": "irate(bind_resolver_dnssec_validation_success_total{instance=~\"$instance\",job=\"$job\"}[120s])", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ view }} / {{ result }}", - "refId": "B", - "step": 10, - "target": "" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "DNSSEC Validation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(rate(bind_query_errors_total{host=~\"$host\"}[$__rate_interval])) by (host,error) / ignoring(error) group_left sum(rate(bind_incoming_queries_total{host=~\"$host\"}[$__rate_interval])) by (host)", + "legendFormat": "{{host}}/{{error}}", + "range": true, + "refId": "A" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Query Error Ratio", + "type": "timeseries" } ], "refresh": "10s", - "schemaVersion": 27, + "schemaVersion": 37, "style": "dark", "tags": [ "bind", @@ -1683,8 +517,6 @@ "text": "localhost", "value": "localhost" }, - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "datasource", @@ -1699,16 +531,46 @@ "type": "datasource" }, { - "allValue": null, "current": { - "selected": false, - "text": "frontend", - "value": "frontend" + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(bind_up, host)", + "hide": 0, + "includeAll": true, + "label": "Host:", + "multi": false, + "name": "host", + "options": [], + "query": { + "query": "label_values(bind_up, host)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "dns_9119", + "value": "dns_9119" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "datasource": "${DS_PROMETHEUS}", "definition": "label_values(bind_up, job)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Job", @@ -1724,41 +586,35 @@ "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, "current": { "selected": true, - "text": "latitanza.frontend.investici.org:9119", - "value": "latitanza.frontend.investici.org:9119" + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, - "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(bind_up, instance)", - "description": null, - "error": null, + "definition": "label_values(bind_resolver_cache_rrsets, view)", "hide": 0, "includeAll": true, - "label": "Host:", "multi": false, - "name": "instance", + "name": "view", "options": [], "query": { - "query": "label_values(bind_up, instance)", + "query": "label_values(bind_resolver_cache_rrsets, view)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "sort": 0, + "type": "query" } ] }, @@ -1794,5 +650,6 @@ "timezone": "browser", "title": "DNS", "uid": "XTqyUORMz", - "version": 2 + "version": 3, + "weekStart": "" }
\ No newline at end of file diff --git a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json index 7a98261..e73a59c 100644 --- a/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json +++ b/float/roles/float-infra-prometheus/templates/grafana/dashboards/host.json @@ -3,19 +3,27 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 1, - "iteration": 1622901747496, "links": [ { "asDropdown": true, @@ -30,6 +38,7 @@ "type": "dashboards" } ], + "liveNow": false, "panels": [ { "alerting": {}, @@ -40,15 +49,14 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "decimals": 0, "description": "See $cluster", "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "grid": {}, @@ -84,7 +92,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -94,6 +102,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "100 - (avg by (cpu) (irate(node_cpu_seconds_total{mode=\"idle\", host=~\"$server\"}[$__rate_interval])) * 100)", "format": "time_series", "hide": true, @@ -103,6 +115,10 @@ "step": 200 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "sum by (mode) (irate(node_cpu_seconds_total{mode!=\"idle\",host=~\"$server\"}[$__rate_interval])) / scalar(count(node_cpu_seconds_total{mode=\"idle\",host=~\"$server\"}))", "format": "time_series", "hide": false, @@ -113,9 +129,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU: utilization", "tooltip": { "msResolution": false, @@ -125,9 +139,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -142,16 +154,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -163,14 +171,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "description": "", "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 0, "fillGradient": 0, "grid": {}, @@ -199,7 +206,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -222,6 +229,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_load1{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, @@ -231,6 +242,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_load5{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, @@ -240,6 +255,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_load15{host=~\"$server\"}", "format": "time_series", "intervalFactor": 4, @@ -249,6 +268,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))", "format": "time_series", "intervalFactor": 4, @@ -258,6 +281,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "sum(count(node_cpu_seconds_total{job=\"node\",mode=\"idle\",host=~\"$server\"}) by (cpu))/2", "format": "time_series", "hide": true, @@ -269,9 +296,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU: saturation (load avg)", "tooltip": { "msResolution": false, @@ -281,9 +306,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -292,22 +315,17 @@ "format": "short", "label": "", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -322,13 +340,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "grid": {}, @@ -361,7 +378,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -377,6 +394,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_MemTotal_bytes{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, @@ -387,6 +408,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_Cached_bytes{host=~\"$server\"}", "format": "time_series", "hide": false, @@ -398,6 +423,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_MemTotal_bytes{host=~\"$server\"} - node_memory_Writeback_bytes{host=~\"$server\"} - node_memory_Cached_bytes{host=~\"$server\"} - node_memory_Buffers_bytes{host=~\"$server\"} - node_memory_MemFree_bytes{host=~\"$server\"}", "format": "time_series", "hide": false, @@ -409,6 +438,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_MemFree_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, @@ -421,9 +454,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory: utilization", "tooltip": { "msResolution": false, @@ -433,33 +464,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -475,13 +498,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "grid": {}, @@ -514,7 +536,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -530,6 +552,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_MemTotal_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, @@ -541,6 +567,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "1 - (node_memory_SwapFree_bytes{host=~\"$server\"} / node_memory_SwapTotal_bytes{host=~\"$server\"})", "format": "time_series", "hide": true, @@ -552,6 +582,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_Dirty_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, @@ -563,6 +597,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_WritebackTmp_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, @@ -574,6 +612,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_vmstat_pswpin{host=~\"$server\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -585,6 +627,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_memory_Writeback_bytes{host=~\"$server\"}", "format": "time_series", "hide": true, @@ -596,6 +642,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_vmstat_pswpout{host=~\"$server\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -608,9 +658,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory: saturation", "tooltip": { "msResolution": false, @@ -620,24 +668,19 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "hertz", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "percentunit", - "label": null, "logBase": 1, "max": "1", "min": "0", @@ -645,8 +688,7 @@ } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -660,13 +702,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 0, "fillGradient": 0, "grid": {}, @@ -695,7 +736,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -710,6 +751,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "8*sum(irate(node_network_receive_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": false, @@ -720,6 +765,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "8*sum(irate(node_network_transmit_bytes_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": false, @@ -730,6 +779,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "8*sum(irate(node_network_transmit_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": true, @@ -741,6 +794,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "8*sum(irate(node_network_receive_packets_total{host=~\"$server.*\",device=~\"(e.+)[0-9]*\"}[$__rate_interval]) > 0)", "format": "time_series", "hide": true, @@ -752,9 +809,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network: utilization", "tooltip": { "msResolution": false, @@ -764,18 +819,14 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bps", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, @@ -783,14 +834,12 @@ "format": "pps", "label": "", "logBase": 32, - "max": null, "min": "0", "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -799,13 +848,12 @@ "bars": true, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "grid": {}, @@ -834,7 +882,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -844,6 +892,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_network_transmit_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_drop_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -855,6 +907,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_network_transmit_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval]) + irate(node_network_receive_errs_total{host=~\"$server.*\",device=~\"(eth|en[0-9a-z]+)[0-9]*\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -867,9 +923,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network: errors", "tooltip": { "msResolution": false, @@ -879,100 +933,141 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "pps", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "none", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "alerting": {}, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "localhost", - "decimals": 0, - "editable": true, - "error": false, + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{host=\"172.17.0.1:9100\"}" + }, + "properties": [ + { + "id": "unit", + "value": "ms" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/time/" + }, + "properties": [ + { + "id": "unit", + "value": "ms" + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, - "grid": {}, "gridPos": { "h": 7, "w": 12, "x": 0, "y": 21 }, - "hiddenSeries": false, "id": 6, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, "links": [], - "nullPointMode": "connected", "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.7", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "{host=\"172.17.0.1:9100\"}", - "yaxis": 2 - }, - { - "alias": "/time/", - "yaxis": 2 + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + }, + "pluginVersion": "9.3.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_disk_reads_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -983,6 +1078,10 @@ "step": 1200 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_disk_writes_completed_total{host=~\"$server.*\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -993,6 +1092,10 @@ "step": 1200 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1003,55 +1106,34 @@ "step": 20 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{device}}", "refId": "C", "step": 30 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Disk: utilization", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": "", - "logBase": 1, - "max": "1", - "min": "0", - "show": true }, { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, + "editorMode": "code", + "expr": "sum(node_md_state{host=~\"$server\",state!=\"active\"}) by (device,state) > 0", + "format": "time_series", + "hide": false, + "interval": "", + "legendFormat": "{{device}}: {{state}}", + "range": true, + "refId": "E" } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "title": "Disk: utilization", + "type": "timeseries" }, { "alerting": {}, @@ -1059,13 +1141,12 @@ "bars": true, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "grid": {}, @@ -1094,7 +1175,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 1, "points": false, "renderer": "flot", @@ -1117,6 +1198,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "exemplar": true, "expr": "node_disk_io_now{host=~\"$server.*\"}", "format": "time_series", @@ -1129,6 +1214,10 @@ "step": 60 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "irate(node_disk_io_time_seconds_total{host=~\"$server\",device=~\"([vs]d[a-z]+|nvme.+)\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1139,6 +1228,10 @@ "step": 1200 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_md_is_active{host=~\"$server\"} < 1", "format": "time_series", "hide": false, @@ -1150,9 +1243,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk: saturation", "tooltip": { "msResolution": false, @@ -1162,36 +1253,27 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:92", - "decimals": null, "format": "short", - "label": null, "logBase": 1, - "max": null, "min": "0", "show": true }, { "$$hashKey": "object:93", "format": "ms", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1199,10 +1281,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, "fill": 1, "fillGradient": 0, @@ -1231,7 +1312,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -1241,6 +1322,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_sockstat_TCP_tw{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, @@ -1250,6 +1335,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_sockstat_UDP_inuse{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, @@ -1259,6 +1348,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_sockstat_TCP_inuse{host=~\"$server\"}", "format": "time_series", "intervalFactor": 2, @@ -1268,6 +1361,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}", "format": "time_series", "hide": true, @@ -1278,9 +1375,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Socket: utilization", "tooltip": { "shared": true, @@ -1289,33 +1384,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1323,10 +1409,9 @@ "bars": true, "dashLength": 10, "dashes": false, - "datasource": "localhost", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, "fill": 1, "fillGradient": 0, @@ -1355,7 +1440,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.7", + "pluginVersion": "9.3.2", "pointradius": 5, "points": false, "renderer": "flot", @@ -1365,6 +1450,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Tcp_InErrs{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, @@ -1374,6 +1463,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Tcp_AttemptFails{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, @@ -1383,6 +1476,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Tcp_EstabResets{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1393,6 +1490,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Udp_RcvbufErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Udp_SndbufErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, @@ -1403,6 +1504,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Udp_InErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 2, @@ -1412,6 +1517,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_TcpExt_RcvPruned{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1422,6 +1531,10 @@ "step": 30 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_TcpExt_SyncookiesFailed{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1432,6 +1545,10 @@ "step": 30 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_TcpExt_ListenDrops{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1442,6 +1559,10 @@ "step": 30 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Icmp_InErrors{host=~\"$server.*\"}[$__rate_interval]) + rate(node_netstat_Icmp_OutErrors{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -1452,6 +1573,10 @@ "step": 30 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_netstat_Tcp_OutRsts{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1464,9 +1589,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Socket: errors", "tooltip": { "shared": true, @@ -1475,9 +1598,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -1486,22 +1607,17 @@ "format": "hertz", "label": "", "logBase": 1, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1509,10 +1625,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, "fill": 1, "fillGradient": 0, @@ -1551,6 +1666,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_filefd_allocated{host=~\"$server.*\"} / node_filefd_maximum{host=~\"$server.*\"}", "format": "time_series", "hide": false, @@ -1562,6 +1681,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_procs_running{host=~\"$server.*\"}", "format": "time_series", "hide": false, @@ -1573,6 +1696,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_nf_conntrack_entries{host=~\"$server.*\"} / node_nf_conntrack_entries_limit{host=~\"$server.*\"}", "format": "time_series", "hide": false, @@ -1584,6 +1711,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_", "format": "time_series", "hide": false, @@ -1596,9 +1727,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "misc: utilization", "tooltip": { "shared": true, @@ -1607,34 +1736,26 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "short", "label": "", "logBase": 1024, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1642,10 +1763,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, "fill": 1, "fillGradient": 0, @@ -1684,6 +1804,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_procs_blocked{host=~\"$server.*\"}", "format": "time_series", "hide": false, @@ -1695,6 +1819,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_entropy_available_bits{host=~\"$server.*\"}", "format": "time_series", "intervalFactor": 2, @@ -1705,6 +1833,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_forks_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1716,6 +1848,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1728,9 +1864,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "misc: saturation", "tooltip": { "shared": true, @@ -1739,33 +1873,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1024, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1773,10 +1899,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, "fill": 1, "fillGradient": 0, @@ -1815,6 +1940,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_edac_uncorrectable_errors_total{host=\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -1826,6 +1955,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_textfile_scrape_error{host=\"$server.*\"}", "format": "time_series", "hide": false, @@ -1837,6 +1970,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_intr_total{host=~\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": true, @@ -1848,6 +1985,10 @@ "target": "isNonNull()" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "rate(node_edac_correctable_errors_total{host=\"$server.*\"}[$__rate_interval])", "format": "time_series", "hide": false, @@ -1860,9 +2001,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "misc: errors", "tooltip": { "shared": true, @@ -1871,33 +2010,25 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1024, - "max": null, "min": "0", "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1906,13 +2037,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "localhost", + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "editable": true, "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 0, "fillGradient": 0, "grid": {}, @@ -1952,6 +2082,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "1- (node_filesystem_avail_bytes{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_size_bytes{host=~\"$server\"})", "format": "time_series", "hide": false, @@ -1962,6 +2096,10 @@ "target": "" }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "1- (node_filesystem_files_free{host=~\"$server\",fstype!~\"(tmpfs|rpc_pipefs|debugfs)\"} / node_filesystem_files{host=~\"$server\"})", "format": "time_series", "hide": false, @@ -1971,6 +2109,10 @@ "step": 60 }, { + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" + }, "expr": "node_filesystem_size_bytes", "format": "time_series", "hide": true, @@ -1982,9 +2124,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "filesystem: utilization", "tooltip": { "msResolution": false, @@ -1994,38 +2134,31 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", - "label": null, "logBase": 1, "max": 1, - "min": null, "show": true }, { "format": "percentunit", - "label": null, "logBase": 1, "max": 1, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], "refresh": "5m", - "schemaVersion": 27, + "schemaVersion": 37, "style": "dark", "tags": [ "prometheus", @@ -2034,19 +2167,18 @@ "templating": { "list": [ { - "allValue": null, "current": { "selected": true, - "text": "indolenza", - "value": "indolenza" + "text": "assenza", + "value": "assenza" + }, + "datasource": { + "type": "prometheus", + "uid": "P49960DE5880E8C68" }, - "datasource": "localhost", "definition": "", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "server", "options": [], @@ -2058,9 +2190,6 @@ "regex": "", "skipUrlSync": false, "sort": 1, - "tagValuesQuery": null, - "tags": [], - "tagsQuery": null, "type": "query", "useTags": false } @@ -2098,5 +2227,6 @@ "timezone": "utc", "title": "Host overview", "uid": "W8eE_Qgik", - "version": 12 -} + "version": 13, + "weekStart": "" +}
\ No newline at end of file diff --git a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 index 0a6caa3..f0f23d2 100644 --- a/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 +++ b/float/roles/float-infra-prometheus/templates/prometheus.yml.j2 @@ -44,6 +44,9 @@ {% macro job_service_config(service_name, target_config) %} - job_name: "{{ service_name }}_{{ target_config.port }}" scheme: "{{ target_config.get('scheme', 'https') }}" +{% if target_config.get('scrape_interval') %} + scrape_interval: "{{ target_config['scrape_interval'] }}" +{% endif %} {% if target_config.get('metrics_path') %} metrics_path: "{{ target_config['metrics_path'] }}" {% endif %} @@ -101,6 +104,7 @@ scrape_configs: {# Blackbox probes #} +{% set probe_scrape_interval = prometheus_probe_scrape_interval | default(prometheus_scrape_interval) %} {% for prober_host in services['prometheus'].hosts|sort %} {% set prober_idx = loop.index %} @@ -109,6 +113,7 @@ scrape_configs: {% for target_config in service.get('monitoring_endpoints', []) %} - job_name: "prober_health_{{ service_name | replace('-', '_') }}_{{ prober_idx }}_{{ loop.index }}" metrics_path: "/probe" + scrape_interval: "{{ probe_scrape_interval }}" params: module: - http_health_{{ target_config.healthcheck_http_method | default('HEAD') | lower }} @@ -135,6 +140,7 @@ scrape_configs: probe: health probeset: health prober_float_service: prometheus + prober_float_endpoint: prober float_service: "{{ service_name }}" float_job: "{{ service_name }}_{{ target_config.port }}" {% endfor %} @@ -142,6 +148,7 @@ scrape_configs: - job_name: "prober_ping_{{ loop.index }}" metrics_path: "/probe" + scrape_interval: "{{ probe_scrape_interval }}" params: module: - ping @@ -168,9 +175,11 @@ scrape_configs: probe: ping probeset: base prober_float_service: prometheus + prober_float_endpoint: prober - job_name: "prober_https_{{ prober_idx }}" metrics_path: "/probe" + scrape_interval: "{{ probe_scrape_interval }}" params: module: - http_base @@ -197,9 +206,11 @@ scrape_configs: probe: https probeset: base prober_float_service: prometheus + prober_float_endpoint: prober - job_name: "prober_dns_{{ prober_idx }}" metrics_path: "/probe" + scrape_interval: "{{ probe_scrape_interval }}" params: module: [dns_toplevel] relabel_configs: @@ -225,6 +236,7 @@ scrape_configs: probe: dns probeset: base prober_float_service: prometheus + prober_float_endpoint: prober {% endfor %} @@ -233,6 +245,7 @@ scrape_configs: {% for prober_host in services[p.service].hosts | sort %} - job_name: "prober_{{ p.name }}_{{ loop.index }}" metrics_path: "/probe" + scrape_interval: "{{ p.scrape_interval | default(probe_scrape_interval) }}" params: module: - {{ p.module | default(p.name) }} @@ -267,6 +280,7 @@ scrape_configs: probe: {{ p.name }} probeset: custom prober_float_service: {{ p.service }} + prober_float_endpoint: {{ float_http_endpoints_by_port[p.port] | default(p.service) }} {% if p.service is defined %} {% for k, v in services[p.service].prober_labels | default({}) | dictsort %} {{ k }}: {{ v }} @@ -280,6 +294,9 @@ scrape_configs: - job_name: "{{ target.name }}" scheme: "{{ target.scheme | default('http') }}" metrics_path: "{{ target.metrics_path | default('/metrics') }}" +{% if target.scrape_interval is defined %} + scrape_interval: "{{ target.scrape_interval }}" +{% endif %} static_configs: - targets: {{ target.targets | to_json }} labels: @@ -320,6 +337,10 @@ scrape_configs: - targets: {{ prometheus_federated_targets | to_json }} {% endif %} +{% if prometheus_extra_scrape_config is defined %} +{{ prometheus_extra_scrape_config }} +{% endif %} + rule_files: - /etc/prometheus/rules/*.yml diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml index 7490704..eb04ce1 100644 --- a/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/alerts_base.conf.yml @@ -95,7 +95,7 @@ groups: Probe {{ $labels.probe }} ({{ $labels.host }}) is failing for target {{ $labels.host }} (success ratio {{ $value }}). - Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/ + Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/ runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]' - alert: ProbeFailure @@ -110,7 +110,7 @@ groups: Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing globally (success ratio {{ $value }}). - Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/ + Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/ runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]' {# Specific ProbeFailure alerts for each custom prober_service->timeout pair #} @@ -128,7 +128,7 @@ groups: Probe {{ $labels.probe }} ({{ $labels.host }}) is failing for target {{ $labels.host }} (success ratio {{ $value }}). - Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/ + Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/ runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]' - alert: ProbeFailure @@ -143,7 +143,7 @@ groups: Probe {{ $labels.probe }} ({{ $labels.zone }}) is failing globally (success ratio {{ $value }}). - Failed probe logs: https://{{ $labels.prober_float_service }}.[[ domain_public[0] ]]/ + Failed probe logs: https://{{ $labels.prober_float_endpoint }}.[[ domain_public[0] ]]/ runbook: '[[ alert_runbook_fmt | format("ProbeFailure") ]]' {% endfor %} diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml index f01ec0c..18a262f 100644 --- a/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/alerts_disk.conf.yml @@ -3,7 +3,7 @@ groups: rules: - alert: DiskWillFillIn4Hours expr: (predict_linear(node_filesystem_avail_bytes[1h], 4 * 3600) < 0) and (node_filesystem_avail_bytes / node_filesystem_size_bytes < 0.6) - for: 30m + for: 1h labels: severity: page scope: host diff --git a/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml b/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml deleted file mode 100644 index 5c63354..0000000 --- a/float/roles/float-infra-prometheus/templates/rules/alerts_nginx.conf.yml +++ /dev/null @@ -1,28 +0,0 @@ -groups: -- name: roles/float-infra-prometheus/templates/rules/alerts_nginx.conf - rules: - - - alert: HTTPErrorRatioHigh - expr: (global:nginx_http_requests_errs:ratio > 0.2 and global:nginx_http_requests_total:rate5m > 0.1) - for: 5m - labels: - scope: global - service: nginx - severity: page - annotations: - summary: 'High HTTP error ratio for {{$labels.vhost}} globally' - description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on all frontends.' - runbook: '[[ alert_runbook_fmt | format("HTTPErrorRatioHigh") ]]' - - - alert: HTTPErrorRatioHigh - expr: (host:nginx_http_requests_errs:ratio > 0.2 and host:nginx_http_requests_total:rate5m > 0.1) - for: 10m - labels: - scope: host - service: nginx - severity: page - annotations: - summary: 'High HTTP error ratio for {{$labels.vhost}} on {{$labels.host}}' - description: 'We are serving lots of 5xx errors for {{$labels.vhost}} on {{$labels.host}}.' - runbook: '[[ alert_runbook_fmt | format("HTTPErrorRatioHigh") ]]' - diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml index c7df069..bb20fb7 100644 --- a/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/rules_base.conf.yml @@ -31,6 +31,10 @@ groups: - record: probe:probe_success:ratio expr: probe:probe_success:sum / probe:probe_success:count + # Separate SLI-oriented metric that looks at success across prober hosts. + - record: probe:probe_success:max + expr: max(probe_success) without (job,instance,prober_host,host) + # Special metric for the ping probe. # The 'bool' qualifier makes the greater-than operation not act as a filter. - record: host_reachable diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml index 2041c9a..01e7767 100644 --- a/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/rules_net.conf.yml @@ -29,6 +29,10 @@ groups: expr: sum(rate(node_network_transmit_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device) - record: instance:public_network_receive_bytes_total:rate5m expr: sum(rate(node_network_receive_bytes_total{device=~"(eth|e[nl][op]).*"}[5m])) without (device) + - record: instance:internal_network_transmit_bytes_total:rate5m + expr: sum(rate(node_network_transmit_bytes_total{device=~"vpn.*"}[5m])) without (device) + - record: instance:internal_network_receive_bytes_total:rate5m + expr: sum(rate(node_network_receive_bytes_total{device=~"vpn.*"}[5m])) without (device) - record: global:public_network_transmit_bytes_total:rate5m expr: sum(instance:public_network_transmit_bytes_total:rate5m) without (instance, host) - record: global:public_network_receive_bytes_total:rate5m diff --git a/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml b/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml index 5eaee6b..60b59bb 100644 --- a/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml +++ b/float/roles/float-infra-prometheus/templates/rules/rules_nginx.conf.yml @@ -9,12 +9,14 @@ groups: expr: (host:nginx_http_requests_errs:rate5m / host:nginx_http_requests_total:rate5m) - record: global:nginx_http_requests_total:rate5m expr: sum(rate(nginx_http_requests[5m])) by (vhost) + - record: global:nginx_http_requests_200:rate5m + expr: sum(rate(nginx_http_requests{code="200"}[5m])) by (vhost) - record: global:nginx_http_requests_errs:rate5m expr: sum(rate(nginx_http_requests{code=~"5.*"}[5m])) by (vhost) - record: global:nginx_http_requests_errs:ratio expr: (global:nginx_http_requests_errs:rate5m / global:nginx_http_requests_total:rate5m) - record: global:nginx_http_cached_requests:ratio - expr: clamp_max(sum(rate(nginx_http_requests_cache[5m])) by (vhost, cache_status) / ignoring (cache_status) group_left global:nginx_http_requests_total:rate5m, 1) + expr: clamp_max(sum(rate(nginx_http_requests_cache[5m])) by (vhost, cache_status) / ignoring (cache_status) group_left global:nginx_http_requests_200:rate5m, 1) - name: http_requests_ms_histogram rules: diff --git a/float/roles/float-infra-service-dashboard/handlers/main.yml b/float/roles/float-infra-service-dashboard/handlers/main.yml new file mode 100644 index 0000000..3865720 --- /dev/null +++ b/float/roles/float-infra-service-dashboard/handlers/main.yml @@ -0,0 +1,6 @@ +--- + +- listen: reload service-dashboard + systemd: + name: docker-service-dashboard-http.service + state: restarted diff --git a/float/roles/float-infra-admin-dashboard/tasks/main.yml b/float/roles/float-infra-service-dashboard/tasks/main.yml index 219108d..17724f9 100644 --- a/float/roles/float-infra-admin-dashboard/tasks/main.yml +++ b/float/roles/float-infra-service-dashboard/tasks/main.yml @@ -9,9 +9,10 @@ copy: dest: "/etc/float/{{ item.name }}" content: "{{ item.data }}" - group: docker-admin-dashboard + group: docker-service-dashboard mode: 0640 - notify: "reload admin-dashboard" + notify: "reload service-dashboard" + no_log: true with_items: - name: services.yml data: "{{ services | to_nice_yaml }}" diff --git a/float/roles/float-infra-sso-server/defaults/main.yml b/float/roles/float-infra-sso-server/defaults/main.yml index c0f325c..28c5317 100644 --- a/float/roles/float-infra-sso-server/defaults/main.yml +++ b/float/roles/float-infra-sso-server/defaults/main.yml @@ -41,3 +41,7 @@ sso_service_ttls: # List of allowed Origins for CORS (URLs without path component). # These are not regular expressions, but you can use wildcards (*). sso_allowed_cors_origins: [] + +# When are users asked to authenticate again? (seconds) +sso_auth_session_lifetime: 43200 + diff --git a/float/roles/float-infra-sso-server/handlers/main.yml b/float/roles/float-infra-sso-server/handlers/main.yml index 07ab764..8a883a8 100644 --- a/float/roles/float-infra-sso-server/handlers/main.yml +++ b/float/roles/float-infra-sso-server/handlers/main.yml @@ -5,6 +5,8 @@ - name: restart user-meta-server systemd: name=user-meta-server.service state=restarted + # Allow failure when testing backups, the unit can't start until later. + ignore_errors: "{{ testing | default(True) }}" - name: restart auth-server systemd: name=auth-server.service state=restarted diff --git a/float/roles/float-infra-sso-server/meta/main.yml b/float/roles/float-infra-sso-server/meta/main.yml index 1e99df1..00e1770 100644 --- a/float/roles/float-infra-sso-server/meta/main.yml +++ b/float/roles/float-infra-sso-server/meta/main.yml @@ -4,3 +4,5 @@ dependencies: - role: float-util-geoip-dataset vars: geoip_dataset: 'Country' + - role: float-util-tor-exits-dataset + diff --git a/float/roles/float-infra-sso-server/templates/server.yml.j2 b/float/roles/float-infra-sso-server/templates/server.yml.j2 index d6360c4..1d7be5e 100644 --- a/float/roles/float-infra-sso-server/templates/server.yml.j2 +++ b/float/roles/float-infra-sso-server/templates/server.yml.j2 @@ -28,7 +28,7 @@ allowed_services: allowed_cors_origins: {{ sso_allowed_cors_origins | to_json }} allowed_exchanges: {{ sso_allowed_exchanges | to_json }} service_ttls: {{ sso_service_ttls | to_json }} -auth_session_lifetime: 43200 +auth_session_lifetime: {{ sso_auth_session_lifetime }} session_auth_key: "{{ sso_session_auth_secret }}" session_enc_key: "{{ sso_session_enc_secret }}" csrf_secret: "{{ sso_csrf_secret }}" @@ -47,8 +47,14 @@ keystore_enable_groups: url_path_prefix: "{{ sso_server_url_path_prefix }}" account_recovery_url: "{{ sso_server_account_recovery_url | default('') }}" default_signed_in_redirect: "{{ sso_server_default_signed_in_redirect | default('') }}" +cookie_same_site_mode: "{{ sso_cookie_same_site_mode | default('strict') }}" device_manager: auth_key: "{{ sso_device_manager_auth_secret }}" + zone_maps: + - type: ipset + path: /var/lib/tor-exits/exit-nodes + value: Tor + - type: geoip http_server: enable_compression: true request_timeout: 10 @@ -72,3 +78,9 @@ site_name: {{ sso_site_title }} {% if sso_favicon is defined %} site_favicon: {{ sso_favicon }} {% endif %} +{% if sso_login_username_label is defined %} +login_username_label: {{ sso_login_username_label }} +{% endif %} +{% if sso_login_again_url is defined %} +login_again_url: "{{ sso_login_again_url }}" +{% endif %} diff --git a/float/roles/float-util-credentials/README.md b/float/roles/float-util-credentials/README.md index de7a8aa..88f1695 100644 --- a/float/roles/float-util-credentials/README.md +++ b/float/roles/float-util-credentials/README.md @@ -12,7 +12,7 @@ on the Ansible host. X509 credentials are stored in /etc/credentials/x509 under directories named after the services. Every service directory contains a copy of the public CA certificate, so it can be bind-mounted in a container -easily. +easily. There will be separate client and server certificates. Private keys have mode 440, are owned by root and by a dedicated group named *service*-credentials. When the service is actually installed, @@ -24,3 +24,25 @@ list of entries specifying the desired credentials. This is already done once system-wide by the *float-credentials* role with the credentials automagically derived from the service definitions by *float*. + +## Multiple PKIs + +The role supports credentials from different PKI CAs, each identified +by a separate *tag*, with *x509* being the tag of the default internal +float CA. + +Additional PKIs are expected to have their CA credentials in the +*credentials_dir*/*tag* local directory, and will have their +certificates installed below /etc/credentials/*tag*. + +There are two ways, when invoking this role, to specify that a +different CA from the default should be used: + +* By setting the *ca_tag* attribute in the *credentials* map of any of + the values passed in the *credentials* variable (yes that's + credentials nested twice). This is how float passes the + *service_credentials* metadata, so you can just set *ca_tag* there. +* By setting the *ca_tag* variable in Ansible when including this + role, if you are creating certificates manually rather than relying + on *service_credentials*. + diff --git a/float/roles/float-util-credentials/tasks/main.yml b/float/roles/float-util-credentials/tasks/main.yml index 907f0ff..b7cf1fe 100644 --- a/float/roles/float-util-credentials/tasks/main.yml +++ b/float/roles/float-util-credentials/tasks/main.yml @@ -16,8 +16,10 @@ changed_when: false register: all_systemd_units -# Get the credential names from the list of certs. - set_fact: + # Default CA name. + default_ca_tag: "{{ ca_tag | default('x509') }}" + # Get the credential names from the list of certs. credentials_names: "{{ credentials | map(attribute='credentials') | map(attribute='name') | unique | list }}" - name: "Create service credentials group" @@ -28,18 +30,18 @@ - name: "Create service credentials dirs" file: - path: "/etc/credentials/x509/{{ item }}" + path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}" state: directory - loop: "{{ credentials_names }}" + loop: "{{ credentials }}" - name: Copy CA copy: - src: "{{ credentials_dir }}/x509/ca.pem" - dest: "/etc/credentials/x509/{{ item }}/ca.pem" + src: "{{ local_ca_path | default(credentials_dir + '/' + (item.credentials.ca_tag | default(default_ca_tag))) }}/ca.pem" + dest: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/ca.pem" owner: root group: root mode: 0644 - loop: "{{ credentials_names }}" + loop: "{{ credentials }}" # Create and sign all certificates in a series of loops (with some # unfortunately complex change-detection logic). @@ -47,7 +49,7 @@ block: - file: - path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}" + path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}" state: directory loop: "{{ credentials }}" @@ -57,9 +59,9 @@ domain: "{{ domain }}" mode: "{{ item.mode }}" params: "{{ item.x509_params|default({}) }}" - private_key_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem" - cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/cert.pem" - ca_cert_path: "/etc/credentials/x509/{{ item.credentials.name }}/ca.pem" + private_key_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem" + cert_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/cert.pem" + ca_cert_path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/ca.pem" check: true loop: "{{ credentials }}" check_mode: no @@ -72,7 +74,7 @@ domain: "{{ domain }}" mode: "{{ item.0.mode }}" params: "{{ item.0.x509_params|default({}) }}" - private_key_path: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem" + private_key_path: "/etc/credentials/{{ item.0.credentials.ca_tag | default(default_ca_tag) }}/{{ item.0.credentials.name }}/{{ item.0.mode }}/private_key.pem" check: false when: "item.1.changed" loop: "{{ credentials | zip(x509_should_update.results) | list }}" @@ -82,15 +84,15 @@ x509_sign: csr: "{{ item.1.csr }}" mode: "{{ item.0.mode }}" - ca_cert_path: "{{ credentials_dir }}/x509/ca.pem" - ca_key_path: "{{ credentials_dir }}/x509/ca_private_key.pem" + ca_cert_path: "{{ local_ca_path | default(credentials_dir + '/' + (item.0.credentials.ca_tag | default(default_ca_tag))) }}/ca.pem" + ca_key_path: "{{ local_ca_path | default(credentials_dir + '/' + (item.0.credentials.ca_tag | default(default_ca_tag))) }}/ca_private_key.pem" when: "item.1.changed" loop: "{{ credentials | zip(x509_csr.results) | list }}" register: x509_sign - name: "Install the signed internal PKI certificates" copy: - dest: "/etc/credentials/x509/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem" + dest: "/etc/credentials/{{ item.0.credentials.ca_tag | default(default_ca_tag) }}/{{ item.0.credentials.name }}/{{ item.0.mode }}/cert.pem" content: "{{ item.1.cert }}" mode: 0644 when: "item.1.changed" @@ -98,7 +100,7 @@ - name: "Set permissions on the private keys" file: - path: "/etc/credentials/x509/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem" + path: "/etc/credentials/{{ item.credentials.ca_tag | default(default_ca_tag) }}/{{ item.credentials.name }}/{{ item.mode }}/private_key.pem" group: "{{ item.credentials.name }}-credentials" mode: 0640 loop: "{{ credentials }}" @@ -112,4 +114,3 @@ rescue: - debug: msg: "Failed to set up one or more credentials" - diff --git a/float/roles/float-util-tor-exits-dataset/README.md b/float/roles/float-util-tor-exits-dataset/README.md new file mode 100644 index 0000000..401c58a --- /dev/null +++ b/float/roles/float-util-tor-exits-dataset/README.md @@ -0,0 +1,2 @@ +Role to install a cron job that periodically updates a list of Tor +exit node IPs in /var/lib/tor-exits/exit-nodes. diff --git a/float/roles/float-util-tor-exits-dataset/files/is-tor-exit b/float/roles/float-util-tor-exits-dataset/files/is-tor-exit new file mode 100644 index 0000000..2a3f470 --- /dev/null +++ b/float/roles/float-util-tor-exits-dataset/files/is-tor-exit @@ -0,0 +1,6 @@ +#!/bin/sh + +exit_nodes_file="/var/lib/tor-exits/exit-nodes" + +test -e ${exit_nodes_file} || exit 1 +exec grep -q "^$1\$" ${exit_nodes_file} diff --git a/float/roles/float-util-tor-exits-dataset/files/update-tor-exits b/float/roles/float-util-tor-exits-dataset/files/update-tor-exits new file mode 100644 index 0000000..b4132c3 --- /dev/null +++ b/float/roles/float-util-tor-exits-dataset/files/update-tor-exits @@ -0,0 +1,15 @@ +#!/bin/sh +# +# Update list of Tor exit nodes in /var/lib/tor-exits/exit-nodes. +# + +url="https://www.dan.me.uk/torlist/?exit" +output="/var/lib/tor-exits/exit-nodes" +tmpfile="/var/lib/tor-exits/.exit-nodes.tmp" + +trap "rm -f ${tmpfile} 2>/dev/null" EXIT + +curl --silent --fail --output "${tmpfile}" "${url}" && \ + mv -f "${tmpfile}" "${output}" + +exit $? diff --git a/float/roles/float-util-tor-exits-dataset/tasks/main.yml b/float/roles/float-util-tor-exits-dataset/tasks/main.yml new file mode 100644 index 0000000..40645f2 --- /dev/null +++ b/float/roles/float-util-tor-exits-dataset/tasks/main.yml @@ -0,0 +1,31 @@ +--- + +- name: Create tor-exits state directory + file: + path: "/var/lib/tor-exits" + state: directory + +- name: Install tor-exits scripts + copy: + src: "{{ item }}" + dest: "/usr/local/bin/{{ item }}" + mode: "0755" + loop: + - "update-tor-exits" + - "is-tor-exit" + +- name: Set up tor-exits update cron job + copy: + dest: "/etc/cron.d/update-tor-exits" + content: "55 */3 * * * root /usr/local/bin/splay 60 && /usr/local/bin/update-tor-exits >/dev/null\n" + +# Run the script right away on first install. It might fail on testing +# environments due to strict rate-limiting on the source URL. +- stat: + path: "/var/lib/tor-exits/exit-nodes" + register: tor_exits_dataset + +- name: Update list of Tor exit nodes + command: "/usr/local/bin/update-tor-exits" + when: "not tor_exits_dataset.stat.exists" + ignore_errors: true diff --git a/float/scripts/floatup.py b/float/scripts/floatup.py index 465bfff..5647d7d 100755 --- a/float/scripts/floatup.py +++ b/float/scripts/floatup.py @@ -87,7 +87,7 @@ def encode_dashboard_request(req): return base64.urlsafe_b64encode(comp.flush()).decode('ascii') -def install_ssh_key(): +def install_vagrant_ssh_key(): # Install the SSH key as Vagrant would do, for compatibility. key_path = os.path.join( os.getenv('HOME'), '.vagrant.d', 'insecure_private_key') @@ -134,6 +134,13 @@ def main(): '--dashboard-url', metavar='URL', help='vmine dashboard base URL (for Gitlab CI)') parser.add_argument( + '--ssh-key', metavar='FILE', + type=argparse.FileType('r'), + help='root SSH key to install on VMs') + parser.add_argument( + '--name', metavar='NAME', + help='group name (for named groups)') + parser.add_argument( 'cmd', choices=['up', 'down']) args = parser.parse_args() @@ -148,6 +155,12 @@ def main(): host_attrs['image'] = args.image req = parse_inventory(args.inventory, host_attrs) req['ttl'] = args.ttl + if args.name: + req['name'] = args.name + if args.ssh_key: + req['ssh_key'] = args.ssh_key + else: + install_vagrant_ssh_key() print(f'creating VM group with attrs {host_attrs} ...') print(f'vmine request: {req}') @@ -157,8 +170,6 @@ def main(): fd.write(group_id) print(f'created VM group {group_id}') - install_ssh_key() - if args.env: with open(args.env, 'w') as fd: fd.write(f'VMINE_ID={group_id}\n') @@ -168,16 +179,22 @@ def main(): fd.write(f'VMINE_GROUP_URL={base_url}/dash/{payload}\n') elif args.cmd == 'down': - try: - with open(args.state_file) as fd: - group_id = fd.read().strip() - except FileNotFoundError: - print('state file not found, exiting') - return - print(f'stopping VM group {group_id}...') - do_request(args.url + '/api/stop-group', args.ssh, - {'group_id': group_id}) - os.remove(args.state_file) + req = {} + if args.name: + req['name'] = args.name + print(f'stopping VM group {args.name}...') + else: + try: + with open(args.state_file) as fd: + group_id = fd.read().strip() + except FileNotFoundError: + print('state file not found, exiting') + return + req['group_id'] = group_id + print(f'stopping VM group {group_id}...') + do_request(args.url + '/api/stop-group', args.ssh, req) + if args.state_file: + os.remove(args.state_file) if __name__ == '__main__': diff --git a/float/services.core.yml b/float/services.core.yml new file mode 100644 index 0000000..2ae62b7 --- /dev/null +++ b/float/services.core.yml @@ -0,0 +1,325 @@ +--- + +frontend: + scheduling_group: frontend + service_credentials: + - name: nginx + enable_server: false + - name: ssoproxy + enable_server: false + - name: replds-acme + systemd_services: + - nginx.service + - haproxy.service + - sso-proxy.service + - replds@acme.service + ports: + - 5005 + volumes: + - name: cache + path: /var/cache/nginx + size: 20g + monitoring_endpoints: + - port: 8404 + scheme: http + +dns: + scheduling_group: frontend + systemd_services: + - bind9.service + monitoring_endpoints: + - name: bind + port: 9119 + scheme: http + +log-collector: + scheduling_group: backend + num_instances: 1 + service_credentials: + - name: log-collector + enable_client: false + monitoring_endpoints: + - port: 9105 + scheme: http + containers: + - name: rsyslog + image: registry.git.autistici.org/ai3/docker/rsyslog:master + ports: + - 6514 + - 9105 + volumes: + - /etc/rsyslog-collector.conf: /etc/rsyslog.conf + - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm + - /var/spool/rsyslog-collector: /var/spool/rsyslog + - /var/log/remote: /var/log/remote + egress_policy: internal + ports: + - 6514 + +prometheus: + scheduling_group: backend + num_instances: 1 + service_credentials: + - { name: prometheus } + containers: + - name: prometheus + image: registry.git.autistici.org/ai3/docker/prometheus:master + port: 9090 + volumes: + - /etc/prometheus: /etc/prometheus + - /var/lib/prometheus/metrics2: /var/lib/prometheus/metrics2 + args: "--storage.tsdb.retention.time={{ prometheus_tsdb_retention | default('90d') }} --web.external-url=https://monitor.{{ domain_public[0] }} --web.enable-lifecycle --query.max-samples={{ prometheus_max_samples | default('5000000') }}" + - name: alertmanager + image: registry.git.autistici.org/ai3/docker/prometheus-alertmanager:master + ports: + - 9093 + - 9094 + volumes: + - /etc/prometheus: /etc/prometheus + - /var/lib/prometheus/alertmanager: /var/lib/prometheus/alertmanager + args: "--web.external-url=https://alertmanager.{{ domain_public[0] }} --cluster.listen-address=:9094 --cluster.advertise-address={{ float_host_dns_map.get(inventory_hostname + '.prometheus', ['']) | list | first }}:9094{% for h in groups['prometheus']|sort if h != inventory_hostname %} --cluster.peer={{ h }}.prometheus.{{ domain }}:9094{% endfor %}" + - name: blackbox + image: registry.git.autistici.org/ai3/docker/prometheus-blackbox:master + ports: + - 9115 + volumes: + - /etc/prometheus: /etc/prometheus + args: "--config.file /etc/prometheus/blackbox.yml" + docker_options: "--cap-add=NET_RAW" + drop_capabilities: false + - name: grafana + image: registry.git.autistici.org/ai3/docker/grafana:master + port: 2929 + volumes: + - /etc/grafana: /etc/grafana + - /var/lib/grafana: /var/lib/grafana + egress_policy: internal + - name: thanos + image: registry.git.autistici.org/ai3/docker/thanos:master + ports: + - 10901 # sidecar grpc + - 10902 # sidecar http + - 10903 # query grpc + - 10904 # query http + - 10905 # query-frontend grpc + - 10906 # query-frontend http + resources: + ram: "1G" + env: + QUERY_FLAGS: "--query.replica-label=monitor {% for h in groups['prometheus']|sort %} --store={{ h }}.prometheus.{{ domain }}:10901{% endfor %}" + SIDECAR_FLAGS: "" + QUERY_FRONTEND_FLAGS: "--query-range.response-cache-config-file=/etc/thanos/query-frontend-cache.yml" + volumes: + - /etc/thanos: /etc/thanos + egress_policy: internal + - name: karma + image: registry.git.autistici.org/ai3/docker/karma:master + ports: + - 9193 + env: + # https://github.com/prymitive/karma/blob/master/docs/CONFIGURATION.md#environment-variables + CONFIG_FILE: "/etc/karma/float.yml" + PORT: 9193 + volumes: + - /etc/karma: /etc/karma + egress_policy: internal + public_endpoints: + - name: monitor + port: 9090 + scheme: http + enable_sso_proxy: true + - name: prober + port: 9115 + scheme: http + enable_sso_proxy: true + - name: grafana + port: 2929 + scheme: https + enable_sso_proxy: true + - name: thanos + port: 10906 + scheme: http + enable_sso_proxy: true + - name: alerts + port: 9193 + scheme: http + enable_sso_proxy: true + monitoring_endpoints: + - port: 9090 + scheme: http + healthcheck_http_method: OPTIONS + - port: 9093 + scheme: http + healthcheck_http_method: OPTIONS + - port: 9193 + scheme: http + healthcheck_http_method: GET + - port: 2929 + scheme: https + - port: 10904 + scheme: http + - port: 10902 + scheme: http + - port: 10906 + scheme: http + ports: + - 9094 + - 10901 + volumes: + - name: metrics + path: /var/lib/prometheus + owner: docker-prometheus + group: docker-prometheus + mode: "0755" + annotations: + dependencies: + - client: prometheus + server: alertmanager + - client: karma + server: alertmanager + - client: thanos + server: prometheus + +sso-server: + num_instances: 1 + scheduling_group: backend + service_credentials: + - name: sso-server + enable_server: false + public_endpoints: + - name: login + port: 5002 + scheme: http + monitoring_endpoints: + - port: 5002 + scheme: http + systemd_services: + - sso-server.service + annotations: + dependencies: + - client: sso-server + server: user-meta-server/user-meta-server + +auth-cache: + scheduling_group: backend + containers: + - name: memcache + image: registry.git.autistici.org/ai3/docker/memcached:master + ports: + - 11212 + - 11213 + env: + PORT: "11212" + egress_policy: internal + ports: + - 11212 + monitoring_endpoints: + - port: 11213 + scheme: http + +user-meta-server: + num_instances: 1 + scheduling_group: backend + service_credentials: + - name: user-meta-server + monitoring_endpoints: + - port: 5505 + scheme: https + ports: + - 5505 + systemd_services: + - user-meta-server.service + datasets: + - name: db + type: litestream + path: /var/lib/user-meta-server + filename: usermeta.db + owner: user-meta-server + litestream_params: + sync-interval: "60s" + +service-dashboard: + scheduling_group: frontend + service_credentials: + - name: service-dashboard + containers: + - name: http + image: registry.git.autistici.org/ai3/tools/float-dashboard:master + port: 8011 + volumes: + - /etc/float: /etc/float + env: + ADDR: ":8011" + DOMAIN: "{{ domain_public[0] }}" + egress_policy: internal + public_endpoints: + - name: service-dashboard + port: 8011 + scheme: http + enable_sso_proxy: true + +backup-metadata: + num_instances: 1 + scheduling_group: backend + service_credentials: + - name: backup-metadata + enable_client: false + monitoring_endpoints: + - port: 5332 + scheme: https + public_endpoints: + - name: backups + port: 5332 + scheme: https + enable_sso_proxy: true + ports: + - 5332 + systemd_services: + - tabacco-metadb.service + datasets: + - name: db + type: litestream + path: /var/lib/tabacco-metadb + filename: meta.db + owner: backup-metadata + +acme: + num_instances: 1 + scheduling_group: frontend + service_credentials: + - name: acme + enable_server: false + monitoring_endpoints: + - port: 5004 + scheme: http + ports: + - 5004 + systemd_services: + - acmeserver.service + +assets: + num_instances: 1 + scheduling_group: backend + service_credentials: + - name: assetmon + containers: + - name: http + image: registry.git.autistici.org/ai3/tools/assetmon:master + volumes: + - /etc/assetmon/server.yml: /etc/assetmon/server.yml + - /var/lib/assetmon: /var/lib/assetmon + ports: + - 3798 + egress_policy: internal + monitoring_endpoints: + - port: 3798 + scheme: https + public_endpoints: + - name: assets + port: 3798 + scheme: https + enable_sso_proxy: true + datasets: + - name: db + path: /var/lib/assetmon + owner: docker-assets diff --git a/float/services.default.yml b/float/services.default.yml new file mode 100644 index 0000000..7d07664 --- /dev/null +++ b/float/services.default.yml @@ -0,0 +1,105 @@ +--- + +include: + - "services.core.yml" + +reports-collector: + scheduling_group: frontend + containers: + - name: http + image: registry.git.autistici.org/ai3/tools/reports-collector:master + ports: + - 3995 + - 3996 + env: + ADDR: ":3995" + SMTP_ADDR: ":3996" + volumes: + - /var/lib/GeoIP: /var/lib/GeoIP + public_endpoints: + - name: live-reports + port: 3995 + scheme: http + monitoring_endpoints: + - port: 3995 + scheme: http + ports: + - 3996 + +log-collector: + scheduling_group: backend + num_instances: 1 + service_credentials: + - name: log-collector + enable_client: false + monitoring_endpoints: + - port: 9105 + scheme: http + - port: 9201 + scheme: http + public_endpoints: + - name: logs + port: 5601 + scheme: http + enable_sso_proxy: true + containers: + - name: rsyslog + image: registry.git.autistici.org/ai3/docker/rsyslog:master + ports: + - 6514 + - 9105 + volumes: + - /etc/rsyslog-collector.conf: /etc/rsyslog.conf + - /etc/rsyslog-collector: /etc/rsyslog-collector + - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm + - /var/spool/rsyslog-collector: /var/spool/rsyslog + - /var/log/remote: /var/log/remote + egress_policy: internal + - name: kibana + image: registry.git.autistici.org/ai3/docker/kibana:master + port: 5601 + volumes: + - /etc/kibana: /etc/kibana + - /var/lib/kibana: /var/lib/kibana + env: + BABEL_CACHE_PATH: "/var/lib/kibana/.babelcache.json" + - name: elasticsearch + image: registry.git.autistici.org/ai3/docker/elasticsearch:master + port: 9200 + volumes: + - /etc/elasticsearch: /etc/elasticsearch + - /var/lib/elasticsearch: /var/lib/elasticsearch + - /var/log/elasticsearch: /var/log/elasticsearch + env: + PORT: 9200 + EXPORTER_PORT: 9201 + ports: + - 6514 + - 9200 + volumes: + - name: elasticsearch + path: /var/lib/elasticsearch + size: 100g + owner: docker-log-collector + group: docker-log-collector + mode: "0700" + annotations: + dependencies: + - client: kibana + server: elasticsearch + - client: log-collector-e2e/prober + server: elasticsearch + +log-collector-e2e: + scheduling_group: all + containers: + - name: prober + image: registry.git.autistici.org/ai3/tools/dye-injector:master + port: 7094 + env: + ADDR: ":7094" + monitoring_endpoints: + - name: log-collector-e2e-prober + port: 7094 + scheme: http + diff --git a/float/services.yml.default b/float/services.yml.default index c7c67a6..77b8ad4 100644..120000 --- a/float/services.yml.default +++ b/float/services.yml.default @@ -1,105 +1 @@ ---- - -include: - - "services.yml.no-elasticsearch" - -reports-collector: - scheduling_group: frontend - containers: - - name: http - image: registry.git.autistici.org/ai3/tools/reports-collector:master - ports: - - 3995 - - 3996 - env: - ADDR: ":3995" - SMTP_ADDR: ":3996" - volumes: - - /var/lib/GeoIP: /var/lib/GeoIP - public_endpoints: - - name: live-reports - port: 3995 - scheme: http - monitoring_endpoints: - - port: 3995 - scheme: http - ports: - - 3996 - -log-collector: - scheduling_group: backend - num_instances: 1 - service_credentials: - - name: log-collector - enable_client: false - monitoring_endpoints: - - port: 9105 - scheme: http - - port: 9201 - scheme: http - public_endpoints: - - name: logs - port: 5601 - scheme: http - enable_sso_proxy: true - containers: - - name: rsyslog - image: registry.git.autistici.org/ai3/docker/rsyslog:master - ports: - - 6514 - - 9105 - volumes: - - /etc/rsyslog-collector.conf: /etc/rsyslog.conf - - /etc/rsyslog-collector: /etc/rsyslog-collector - - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm - - /var/spool/rsyslog-collector: /var/spool/rsyslog - - /var/log/remote: /var/log/remote - egress_policy: internal - - name: kibana - image: registry.git.autistici.org/ai3/docker/kibana:master - port: 5601 - volumes: - - /etc/kibana: /etc/kibana - - /var/lib/kibana: /var/lib/kibana - env: - BABEL_CACHE_PATH: "/var/lib/kibana/.babelcache.json" - - name: elasticsearch - image: registry.git.autistici.org/ai3/docker/elasticsearch:master - port: 9200 - volumes: - - /etc/elasticsearch: /etc/elasticsearch - - /var/lib/elasticsearch: /var/lib/elasticsearch - - /var/log/elasticsearch: /var/log/elasticsearch - env: - PORT: 9200 - EXPORTER_PORT: 9201 - ports: - - 6514 - - 9200 - volumes: - - name: elasticsearch - path: /var/lib/elasticsearch - size: 100g - owner: docker-log-collector - group: docker-log-collector - mode: "0700" - annotations: - dependencies: - - client: kibana - server: elasticsearch - - client: log-collector-e2e/prober - server: elasticsearch - -log-collector-e2e: - scheduling_group: all - containers: - - name: prober - image: registry.git.autistici.org/ai3/tools/dye-injector:master - port: 7094 - env: - ADDR: ":7094" - monitoring_endpoints: - - name: log-collector-e2e-prober - port: 7094 - scheme: http - +services.default.yml
\ No newline at end of file diff --git a/float/services.yml.no-elasticsearch b/float/services.yml.no-elasticsearch index 3665352..3685dcf 100644..120000 --- a/float/services.yml.no-elasticsearch +++ b/float/services.yml.no-elasticsearch @@ -1,318 +1 @@ ---- - -frontend: - scheduling_group: frontend - service_credentials: - - name: nginx - enable_server: false - - name: ssoproxy - enable_server: false - - name: replds-acme - systemd_services: - - nginx.service - - haproxy.service - - sso-proxy.service - - replds@acme.service - ports: - - 5005 - volumes: - - name: cache - path: /var/cache/nginx - size: 20g - monitoring_endpoints: - - port: 8404 - scheme: http - -dns: - scheduling_group: frontend - systemd_services: - - bind9.service - monitoring_endpoints: - - name: bind - port: 9119 - scheme: http - -log-collector: - scheduling_group: backend - num_instances: 1 - service_credentials: - - name: log-collector - enable_client: false - monitoring_endpoints: - - port: 9105 - scheme: http - containers: - - name: rsyslog - image: registry.git.autistici.org/ai3/docker/rsyslog:master - ports: - - 6514 - - 9105 - volumes: - - /etc/rsyslog-collector.conf: /etc/rsyslog.conf - - /etc/rsyslog-collector-lognorm: /etc/rsyslog-collector-lognorm - - /var/spool/rsyslog-collector: /var/spool/rsyslog - - /var/log/remote: /var/log/remote - egress_policy: internal - ports: - - 6514 - -prometheus: - scheduling_group: backend - num_instances: 1 - service_credentials: - - { name: prometheus } - containers: - - name: prometheus - image: registry.git.autistici.org/ai3/docker/prometheus:master - port: 9090 - volumes: - - /etc/prometheus: /etc/prometheus - - /var/lib/prometheus/metrics2: /var/lib/prometheus/metrics2 - args: "--storage.tsdb.retention.time={{ prometheus_tsdb_retention | default('90d') }} --web.external-url=https://monitor.{{ domain_public[0] }} --web.enable-lifecycle --query.max-samples={{ prometheus_max_samples | default('5000000') }}" - - name: alertmanager - image: registry.git.autistici.org/ai3/docker/prometheus-alertmanager:master - ports: - - 9093 - - 9094 - volumes: - - /etc/prometheus: /etc/prometheus - - /var/lib/prometheus/alertmanager: /var/lib/prometheus/alertmanager - args: "--web.external-url=https://alertmanager.{{ domain_public[0] }} --cluster.listen-address=:9094 --cluster.advertise-address={{ float_host_dns_map.get(inventory_hostname + '.prometheus', ['']) | list | first }}:9094{% for h in groups['prometheus']|sort if h != inventory_hostname %} --cluster.peer={{ h }}.prometheus.{{ domain }}:9094{% endfor %}" - - name: blackbox - image: registry.git.autistici.org/ai3/docker/prometheus-blackbox:master - ports: - - 9115 - volumes: - - /etc/prometheus: /etc/prometheus - args: "--config.file /etc/prometheus/blackbox.yml" - docker_options: "--cap-add=NET_RAW" - drop_capabilities: false - - name: grafana - image: registry.git.autistici.org/ai3/docker/grafana:master - port: 2929 - volumes: - - /etc/grafana: /etc/grafana - - /var/lib/grafana: /var/lib/grafana - egress_policy: internal - - name: thanos - image: registry.git.autistici.org/ai3/docker/thanos:master - ports: - - 10901 # sidecar grpc - - 10902 # sidecar http - - 10903 # query grpc - - 10904 # query http - - 10905 # query-frontend grpc - - 10906 # query-frontend http - resources: - ram: "1G" - env: - QUERY_FLAGS: "--query.replica-label=monitor {% for h in groups['prometheus']|sort %} --store={{ h }}.prometheus.{{ domain }}:10901{% endfor %}" - SIDECAR_FLAGS: "" - QUERY_FRONTEND_FLAGS: "--query-range.response-cache-config-file=/etc/thanos/query-frontend-cache.yml" - volumes: - - /etc/thanos: /etc/thanos - egress_policy: internal - - name: karma - image: registry.git.autistici.org/ai3/docker/karma:master - ports: - - 9193 - env: - # https://github.com/prymitive/karma/blob/master/docs/CONFIGURATION.md#environment-variables - CONFIG_FILE: "/etc/karma/float.yml" - PORT: 9193 - volumes: - - /etc/karma: /etc/karma - egress_policy: internal - public_endpoints: - - name: monitor - port: 9090 - scheme: http - enable_sso_proxy: true - - name: prober - port: 9115 - scheme: http - enable_sso_proxy: true - - name: grafana - port: 2929 - scheme: https - enable_sso_proxy: true - - name: thanos - port: 10906 - scheme: http - enable_sso_proxy: true - - name: alerts - port: 9193 - scheme: http - enable_sso_proxy: true - monitoring_endpoints: - - port: 9090 - scheme: http - healthcheck_http_method: OPTIONS - - port: 9093 - scheme: http - healthcheck_http_method: OPTIONS - - port: 9193 - scheme: http - healthcheck_http_method: GET - - port: 2929 - scheme: https - - port: 10904 - scheme: http - - port: 10902 - scheme: http - - port: 10906 - scheme: http - ports: - - 9094 - - 10901 - volumes: - - name: metrics - path: /var/lib/prometheus - owner: docker-prometheus - group: docker-prometheus - mode: "0755" - annotations: - dependencies: - - client: prometheus - server: alertmanager - - client: karma - server: alertmanager - - client: thanos - server: prometheus - -sso-server: - num_instances: 1 - scheduling_group: backend - service_credentials: - - name: sso-server - enable_server: false - public_endpoints: - - name: login - port: 5002 - scheme: http - monitoring_endpoints: - - port: 5002 - scheme: http - systemd_services: - - sso-server.service - annotations: - dependencies: - - client: sso-server - server: user-meta-server/user-meta-server - -auth-cache: - scheduling_group: backend - containers: - - name: memcache - image: registry.git.autistici.org/ai3/docker/memcached:master - port: 11212 - env: - PORT: "11212" - egress_policy: internal - ports: - - 11212 - -user-meta-server: - num_instances: 1 - scheduling_group: backend - service_credentials: - - name: user-meta-server - monitoring_endpoints: - - port: 5505 - scheme: https - ports: - - 5505 - systemd_services: - - user-meta-server.service - datasets: - - name: db - type: litestream - path: /var/lib/user-meta-server - filename: usermeta.db - owner: user-meta-server - -admin-dashboard: - scheduling_group: frontend - service_credentials: - - name: admin-dashboard - containers: - - name: http - image: registry.git.autistici.org/ai3/tools/float-dashboard:master - port: 8011 - volumes: - - /etc/float: /etc/float - env: - ADDR: ":8011" - DOMAIN: "{{ domain_public[0] }}" - egress_policy: internal - public_endpoints: - - name: admin - port: 8011 - scheme: http - enable_sso_proxy: true - -backup-metadata: - num_instances: 1 - scheduling_group: backend - service_credentials: - - name: backup-metadata - enable_client: false - monitoring_endpoints: - - port: 5332 - scheme: https - public_endpoints: - - name: backups - port: 5332 - scheme: https - enable_sso_proxy: true - ports: - - 5332 - systemd_services: - - tabacco-metadb.service - datasets: - - name: db - type: litestream - path: /var/lib/tabacco-metadb - filename: meta.db - owner: backup-metadata - -acme: - num_instances: 1 - scheduling_group: frontend - service_credentials: - - name: acme - enable_server: false - monitoring_endpoints: - - port: 5004 - scheme: http - ports: - - 5004 - systemd_services: - - acmeserver.service - -assets: - num_instances: 1 - scheduling_group: backend - service_credentials: - - name: assetmon - containers: - - name: http - image: registry.git.autistici.org/ai3/tools/assetmon:master - volumes: - - /etc/assetmon/server.yml: /etc/assetmon/server.yml - - /var/lib/assetmon: /var/lib/assetmon - ports: - - 3798 - egress_policy: internal - monitoring_endpoints: - - port: 3798 - scheme: https - public_endpoints: - - name: assets - port: 3798 - scheme: https - enable_sso_proxy: true - datasets: - - name: db - path: /var/lib/assetmon - owner: docker-assets +services.core.yml
\ No newline at end of file diff --git a/float/test-driver b/float/test-driver index 12e943b..863c717 100755 --- a/float/test-driver +++ b/float/test-driver @@ -56,7 +56,7 @@ save_logs() { log "Saving logs from VMs (if any)" mkdir -p "${out_dir}" ANSIBLE_STDOUT_CALLBACK=null \ - ${float_dir}/float run -e "callback_whitelist=" -e "out_dir=${out_dir}" \ + ${float_dir}/float run -e "callbacks_enabled=" -e "out_dir=${out_dir}" \ ${float_dir}/test/save-logs.yml } @@ -92,9 +92,12 @@ Commands: cleanup Cleanup the test environment (turn down VMs, etc) - run Run the main float playbook, and the test suite, using - the playbooks specified by the remaining command-line - arguments + provision Run the main float playbook + + test Run the test suite, using the playbooks specified by + the remaining command-line arguments + + run (deprecated) Run 'provision' and 'test' If DIR is specified, chdir there before running anything. @@ -119,6 +122,20 @@ if [ $# -gt 0 ]; then shift fi +do_provision() { + log Running main playbook + ${float_dir}/float run site.yml \ + || die "failed to run the main playbook" +} + +do_test() { + for playbook in "${float_dir}/test/integration-test.yml" "$@"; do + log Running test playbook ${playbook} + ${float_dir}/float run ${playbook} \ + || die "test playbook failed" + done +} + case "$cmd" in init) run_init @@ -134,16 +151,18 @@ case "$cmd" in run_cleanup ;; + provision) + do_provision + ;; + + test) + do_test + ;; + + # Legacy command to run both 'provision' and 'test' at once. run) - log Running main playbook - ${float_dir}/float run site.yml \ - || die "failed to run the main playbook" - - for playbook in "${float_dir}/test/integration-test.yml" "$@"; do - log Running test playbook ${playbook} - ${float_dir}/float run ${playbook} \ - || die "test playbook failed" - done + do_provision + do_test ;; *) diff --git a/float/test/README.md b/float/test/README.md index 82d8db7..86a856f 100644 --- a/float/test/README.md +++ b/float/test/README.md @@ -135,8 +135,8 @@ clear speed improvement. This is, in fact, already the default for test environments created with "float create-env" and it can be achieved by: -* importing "services.yml.no-elasticsearch" instead of - "services.yml.default" for the float built-in service definitions; +* importing "services.core.yml" instead of + "services.default.yml" for the float built-in service definitions; * setting the configuration variable "enable_elasticsearch" to false. The resulting environment will still run the *log-collector* service, diff --git a/float/test/backup.ref/services.yml b/float/test/backup.ref/services.yml index 8db1002..48d3bf3 100644 --- a/float/test/backup.ref/services.yml +++ b/float/test/backup.ref/services.yml @@ -1,7 +1,7 @@ --- include: - - "../../services.yml.no-elasticsearch" + - "../../services.core.yml" ok: scheduling_group: backend @@ -10,7 +10,7 @@ ok: image: registry.git.autistici.org/ai3/docker/okserver:latest port: 3100 env: - PORT: 3100 + ADDR: ":3100" resources: ram: 1g cpu: 0.5 diff --git a/float/test/backup.ref/site.yml b/float/test/backup.ref/site.yml index 601f945..7d5a194 100644 --- a/float/test/backup.ref/site.yml +++ b/float/test/backup.ref/site.yml @@ -7,4 +7,11 @@ - name: Create the test bucket run_once: true command: "podman run --env MC_HOST_local=http://minio:miniopassword@backup:9000 --network host --rm quay.io/minio/mc mb local/backuptest" + - systemd: + name: tabacco-metadb.service + state: restarted + - systemd: + name: user-meta-server.service + state: restarted + - local_action: shell sleep 60 diff --git a/float/test/base.ref/services.yml b/float/test/base.ref/services.yml index 0ed283e..ce0e7a8 100644 --- a/float/test/base.ref/services.yml +++ b/float/test/base.ref/services.yml @@ -32,7 +32,7 @@ ok: image: registry.git.autistici.org/ai3/docker/okserver:latest port: 3100 env: - PORT: 3100 + ADDR: ":3100" public_endpoints: - name: ok port: 3100 diff --git a/float/test/float_integration_test/http.py b/float/test/float_integration_test/http.py index 261aceb..c82ca7e 100644 --- a/float/test/float_integration_test/http.py +++ b/float/test/float_integration_test/http.py @@ -1,3 +1,4 @@ +import html import http.cookiejar import http.client import os @@ -40,10 +41,10 @@ class SSOHandler(urllib.request.BaseHandler): self._login_form_url = login_server.rstrip('/') + '/login' self._auth_notify_cb = auth_notify_cb - def _extract_hidden_form_data(self, html): + def _extract_hidden_form_data(self, body): form = {} - for name, value in self._form_pattern.findall(html): - form[name] = value + for name, value in self._form_pattern.findall(body): + form[name] = html.unescape(value) return form def https_response(self, req, resp): @@ -101,7 +102,8 @@ def _build_opener(ipaddr, follow_redirects=False, *extra_handlers): # Create a tolerant SSL context that accepts the self-signed # certificates used by the testing environment. ssl_context = ssl.create_default_context( - ssl.Purpose.CLIENT_AUTH) + purpose=ssl.Purpose.SERVER_AUTH) + ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE debuglevel = 1 if os.getenv('HTTP_TRACE') else 0 diff --git a/float/test/float_integration_test/test_system.py b/float/test/float_integration_test/test_system.py index d07823c..7b08887 100644 --- a/float/test/float_integration_test/test_system.py +++ b/float/test/float_integration_test/test_system.py @@ -43,7 +43,7 @@ class TestBuiltinServiceURLs(URLTestBase): """Verify that all the public_endpoints are reachable. Tests will only run if the corresponding service (from - services.yml.default) is actually enabled. + services.yml) is actually enabled. """ @@ -57,8 +57,8 @@ class TestBuiltinServiceURLs(URLTestBase): def test_okserver(self): self.assert_endpoint_ok_if_enabled('ok', 'ok') - def test_admin_dashboard(self): - self.assert_endpoint_ok_if_enabled('admin-dashboard', 'admin', True) + def test_service_dashboard(self): + self.assert_endpoint_ok_if_enabled('service-dashboard', 'service-dashboard', True) def test_monitor(self): self.assert_endpoint_ok_if_enabled('prometheus', 'monitor', True) diff --git a/float/test/full.ref/services.yml b/float/test/full.ref/services.yml index 893079d..95fcd23 100644 --- a/float/test/full.ref/services.yml +++ b/float/test/full.ref/services.yml @@ -1,7 +1,7 @@ --- include: - - "../../services.yml.no-elasticsearch" + - "../../services.core.yml" ok: scheduling_group: backend @@ -10,7 +10,7 @@ ok: image: registry.git.autistici.org/ai3/docker/okserver:latest port: 3100 env: - PORT: 3100 + ADDR: ":3100" resources: ram: 1g cpu: 0.5 @@ -27,7 +27,7 @@ ok-root: root: true port: 799 env: - PORT: 799 + ADDR: ":799" resources: ram: 1g cpu: 0.5 |