From afd907c16d7ecee41ab15b0ce02c356dca9eba47 Mon Sep 17 00:00:00 2001
From: Ian Wienand <iwienand@redhat.com>
Date: Thu, 14 Feb 2019 08:10:51 +1100
Subject: [PATCH] letsencrypt support

This change contains the roles and testing for deploying certificates
on hosts using letsencrypt with domain authentication.

From a top level, the process is implemented in the roles as follows:

1) letsencrypt-acme-sh-install

   This role installs the acme.sh tool on hosts in the letsencrypt
   group, along with a small custom driver script to help parse output
   that is used by later roles.

2) letsencrypt-request-certs

   This role runs on each host, and reads a host variable describing
   the certificates required.  It uses the acme.sh tool (via the
   driver) to request the certificates from letsencrypt.  It populates
   a global Ansible variable with the authentication TXT records
   required.

   If the certificate exists on the host and is not within the renewal
   period, it should do nothing.

3) letsencrypt-install-txt-record

   This role runs on the adns server.  It installs the TXT records
   generated in step 2 to the acme.opendev.org domain and then
   refreshes the server.  Hosts wanting certificates will have
   pre-provisioned CNAME records for _acme-challenge.host.opendev.org
   pointing to acme.opendev.org.

4) letsencrypt-create-certs

   This role runs on each host, reading the same variable as in step
   2.  However this time the acme.sh tool is run to authenticate and
   create the certificates, which should now work correctly via the
   TXT records from step 3.  After this, the host will have the
   full certificate material.

Testing is added via testinfra.  For testing purposes requests are
made to the staging letsencrypt servers and a self-signed certificate
is provisioned in step 4 (as the authentication is not available
during CI).  We test that the DNS TXT records are created locally on
the CI adns server, however.

Related-Spec: https://review.openstack.org/587283

Change-Id: I1f66da614751a29cc565b37cdc9ff34d70fdfd3f
---
 .zuul.yaml                                    | 30 ++++++++
 inventory/groups.yaml                         |  2 +
 playbooks/base.yaml                           | 17 +++++
 .../letsencrypt-acme-sh-install/README.rst    |  9 +++
 .../files/driver.sh                           | 76 +++++++++++++++++++
 .../tasks/main.yaml                           | 23 ++++++
 .../roles/letsencrypt-create-certs/README.rst | 19 +++++
 .../defaults/main.yaml                        |  1 +
 .../letsencrypt-create-certs/tasks/acme.yaml  | 16 ++++
 .../letsencrypt-create-certs/tasks/main.yaml  | 13 ++++
 .../letsencrypt-install-txt-record/README.rst | 19 +++++
 .../tasks/main.yaml                           | 35 +++++++++
 .../templates/zone.db.j2                      | 17 +++++
 .../letsencrypt-request-certs/README.rst      | 53 +++++++++++++
 .../defaults/main.yaml                        |  1 +
 .../letsencrypt-request-certs/tasks/acme.yaml | 29 +++++++
 .../letsencrypt-request-certs/tasks/main.yaml | 25 ++++++
 playbooks/zuul/run-base.yaml                  |  3 +
 playbooks/zuul/templates/gate-groups.yaml.j2  |  4 +
 .../templates/group_vars/letsencrypt.yaml.j2  |  4 +
 .../letsencrypt01.opendev.org.yaml.j2         |  7 ++
 .../letsencrypt02.opendev.org.yaml.j2         |  4 +
 testinfra/test_letsencrypt.py                 | 60 +++++++++++++++
 23 files changed, 467 insertions(+)
 create mode 100644 playbooks/roles/letsencrypt-acme-sh-install/README.rst
 create mode 100644 playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh
 create mode 100644 playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml
 create mode 100644 playbooks/roles/letsencrypt-create-certs/README.rst
 create mode 100644 playbooks/roles/letsencrypt-create-certs/defaults/main.yaml
 create mode 100644 playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml
 create mode 100644 playbooks/roles/letsencrypt-create-certs/tasks/main.yaml
 create mode 100644 playbooks/roles/letsencrypt-install-txt-record/README.rst
 create mode 100644 playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml
 create mode 100644 playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2
 create mode 100644 playbooks/roles/letsencrypt-request-certs/README.rst
 create mode 100644 playbooks/roles/letsencrypt-request-certs/defaults/main.yaml
 create mode 100644 playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml
 create mode 100644 playbooks/roles/letsencrypt-request-certs/tasks/main.yaml
 create mode 100644 playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2
 create mode 100644 playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2
 create mode 100644 playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2
 create mode 100644 testinfra/test_letsencrypt.py

diff --git a/.zuul.yaml b/.zuul.yaml
index 5000ada1fe..0b6b6e0749 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml
@@ -439,6 +439,34 @@
       - playbooks/group_vars/eavesdrop.yaml
       - testinfra/test_eavesdrop.py
 
+
+- job:
+    name: system-config-run-letsencrypt
+    parent: system-config-run
+    description: |
+      Run the playbook for letsencrypt key acquisition
+    nodeset:
+      nodes:
+        - name: bridge.openstack.org
+          label: ubuntu-bionic
+        - name: adns-letsencrypt.opendev.org
+          label: ubuntu-bionic
+        - name: letsencrypt01.opendev.org
+          label: ubuntu-bionic
+        - name: letsencrypt02.opendev.org
+          label: ubuntu-bionic
+    host-vars:
+      letsencrypt01.opendev.org:
+        host_copy_output:
+          '/var/log/acme.sh': logs
+      letsencrypt02.opendev.org:
+        host_copy_output:
+          '/var/log/acme.sh': logs
+    files:
+      - .zuul.yaml
+      - playbooks/group_vars/letsencrypt.yaml
+      - playbooks/roles/letsencrypt.*
+
 - job:
     name: system-config-run-nodepool
     parent: system-config-run
@@ -647,6 +675,7 @@
               - name: system-config-build-image-gitea
                 soft: true
         - system-config-run-zuul-preview
+        - system-config-run-letsencrypt
         - system-config-build-image-jinja-init
         - system-config-build-image-gitea-init
         - system-config-build-image-gitea
@@ -673,6 +702,7 @@
               - name: system-config-upload-image-gitea
                 soft: true
         - system-config-run-zuul-preview
+        - system-config-run-letsencrypt
         - system-config-upload-image-jinja-init
         - system-config-upload-image-gitea-init
         - system-config-upload-image-gitea
diff --git a/inventory/groups.yaml b/inventory/groups.yaml
index 43610aefc0..394dd653ff 100644
--- a/inventory/groups.yaml
+++ b/inventory/groups.yaml
@@ -73,6 +73,8 @@ groups:
     - kdc[0-9]*.open*.org
   kubernetes:
     - opendev-k8s*.opendev.org
+#  letsencrypt:
+#    - TBD
   logstash:
     - logstash[0-9]*.open*.org
   logstash-worker:
diff --git a/playbooks/base.yaml b/playbooks/base.yaml
index 84b113c55b..5d66aeb601 100644
--- a/playbooks/base.yaml
+++ b/playbooks/base.yaml
@@ -91,3 +91,20 @@
   roles:
     - install-docker
     - zuul-preview
+
+# This next section needs to happen in order.  letsencrypt hosts
+# export their TXT authentication records which is installed onto
+# adns1, and then the hosts verify to issue/renew keys
+- hosts: "letsencrypt:!disabled"
+  name: "Base: deploy and renew certificates"
+  roles:
+    - letsencrypt-acme-sh-install
+    - letsencrypt-request-certs
+- hosts: "adns:!disabled"
+  name: "Install txt records"
+  roles:
+    - letsencrypt-install-txt-record
+- hosts: "letsencrypt:!disabled"
+  name: "Create certs"
+  roles:
+    - letsencrypt-create-certs
diff --git a/playbooks/roles/letsencrypt-acme-sh-install/README.rst b/playbooks/roles/letsencrypt-acme-sh-install/README.rst
new file mode 100644
index 0000000000..76595607a0
--- /dev/null
+++ b/playbooks/roles/letsencrypt-acme-sh-install/README.rst
@@ -0,0 +1,9 @@
+Install acme.sh client
+
+This makes the `acme.sh <https://github.com/Neilpang/acme.sh>`__
+client available on the host.
+
+Additionally a ``driver.sh`` script is installed to run the
+authentication procedure and parse output.
+
+**Role Variables**
diff --git a/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh b/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh
new file mode 100644
index 0000000000..0b84f2ac4d
--- /dev/null
+++ b/playbooks/roles/letsencrypt-acme-sh-install/files/driver.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+ACME_SH=${ACME_SH:-/opt/acme.sh/acme.sh}
+CERT_HOME=${CERT_HOME:-/etc/letsencrypt-certs}
+CHALLENGE_ALIAS_DOMAIN=${CHALLENGE_ALIAS_DOMAIN:-acme.opendev.org.}
+# Set to !0 to use letsencrypt staging rather than production requests
+LETSENCRYPT_STAGING=${LETSENCRYPT_STAGING:-0}
+LOG_FILE=${LOG_FILE:-/var/log/acme.sh/acme.sh.log}
+
+STAGING=""
+if [[ ${LETSENCRYPT_STAGING} != 0 ]]; then
+    STAGING="--staging"
+fi
+
+echo -e  "\n--- start --- ${1} --- $(date -u '+%Y-%m-%dT%k:%M:%S%z') ---" >> ${LOG_FILE}
+
+if [[ ${1} == "issue" ]]; then
+    # Take output like:
+    #  [Thu Feb 14 13:44:37 AEDT 2019] Domain: '_acme-challenge.test.opendev.org'
+    #  [Thu Feb 14 13:44:37 AEDT 2019] TXT value: 'QjkChGcuqD7rl0jN8FNWkWNAISX1Zry_vE-9RxWF2pE'
+    #
+    # and turn it into:
+    #
+    # _acme-challenge.test.opendev.org:QjkChGcuqD7rl0jN8FNWkWNAISX1Zry_vE-9RxWF2pE
+    #
+    # Ansible then parses this back to a dict.
+    shift;
+    for arg in "$@"; do
+        $ACME_SH ${STAGING} \
+            --cert-home ${CERT_HOME} \
+            --no-color \
+            --yes-I-know-dns-manual-mode-enough-go-ahead-please \
+            --issue \
+            --dns \
+            --challenge-alias ${CHALLENGE_ALIAS_DOMAIN} \
+            $arg 2>&1 | tee -a ${LOG_FILE} | \
+                egrep 'Domain:|TXT value:' | cut -d"'" -f2 | paste -d':' - -
+                # shell magic ^ is
+                #  - extract everything between ' '
+                #  - stick every two lines together, separated by a :
+    done
+elif [[ ${1} == "renew" ]]; then
+    shift;
+    for arg in "$@"; do
+        $ACME_SH ${STAGING} \
+            --cert-home ${CERT_HOME} \
+            --no-color \
+            --yes-I-know-dns-manual-mode-enough-go-ahead-please \
+            --renew \
+            $arg 2>&1 | tee -a ${LOG_FILE}
+    done
+elif [[ ${1} == "selfsign" ]]; then
+    # For testing, simulate the key generation
+    shift;
+    for arg in "$@"; do
+        # TODO(ianw): Set SAN names from the other "-d" arguments?;
+        # it's a pita to parse.
+        {
+            read -r -a domain_array <<< "$arg"
+            domain=${domain_array[1]}
+            mkdir -p ${CERT_HOME}/${domain}
+            cd ${CERT_HOME}/${domain}
+            echo "Creating certs in ${CERT_HOME}/${domain}"
+            openssl genrsa -out ${domain}.key 2048
+            openssl rsa -in ${domain}.key -out ${domain}.key
+            openssl req -sha256 -new -key ${domain}.key -out ${domain}.csr -subj '/CN=localhost'
+            openssl x509 -req -sha256 -days 365 -in ${domain}.csr -signkey ${domain}.key -out ${domain}.cer
+            cp ${domain}.cer fullchain.cer
+        } | tee -a ${LOG_FILE}
+    done
+else
+    echo "Unknown driver arg: $1"
+    exit 1
+fi
+
+echo "--- end   --- $(date -u '+%Y-%m-%dT%k:%M:%S%z') ---" >> ${LOG_FILE}
diff --git a/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml b/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml
new file mode 100644
index 0000000000..cc4ef4ccf1
--- /dev/null
+++ b/playbooks/roles/letsencrypt-acme-sh-install/tasks/main.yaml
@@ -0,0 +1,23 @@
+- name: Install acme.sh client
+  git:
+    repo: https://github.com/Neilpang/acme.sh
+    dest: /opt/acme.sh
+    version: dev
+
+- name: Install driver script
+  copy:
+    src: driver.sh
+    dest: /opt/acme.sh/driver.sh
+    mode: 0755
+
+- name: Setup log directory
+  file:
+    path: /var/log/acme.sh
+    state: directory
+    mode: 0755
+
+- name: Setup log rotation
+  include_role:
+    name: logrotate
+  vars:
+    logrotate_file_name: /var/log/acme.sh/acme.sh.log
\ No newline at end of file
diff --git a/playbooks/roles/letsencrypt-create-certs/README.rst b/playbooks/roles/letsencrypt-create-certs/README.rst
new file mode 100644
index 0000000000..d121aa8d48
--- /dev/null
+++ b/playbooks/roles/letsencrypt-create-certs/README.rst
@@ -0,0 +1,19 @@
+Generate letsencrypt certificates
+
+This must run after the ``letsencrypt-install-acme-sh``,
+``letsencrypt-request-certs`` and ``letsencrypt-install-txt-records``
+roles.  It will run the ``acme.sh`` process to create the certificates
+on the host.
+
+**Role Variables**
+
+.. zuul:rolevar:: letsencrypt_test_only
+
+   If set to True, will locally generate self-signed certificates in
+   the same locations the real script would, instead of contacting
+   letsencrypt.  This is set during gate testing as the
+   authentication tokens are not available.
+
+.. zuul:rolevar:: letsencrypt_certs
+
+   The same variable as described in ``letsencrypt-request-certs``.
diff --git a/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml b/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml
new file mode 100644
index 0000000000..79476abe0a
--- /dev/null
+++ b/playbooks/roles/letsencrypt-create-certs/defaults/main.yaml
@@ -0,0 +1 @@
+letsencrypt_test_only: False
diff --git a/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml b/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml
new file mode 100644
index 0000000000..5ee9887ead
--- /dev/null
+++ b/playbooks/roles/letsencrypt-create-certs/tasks/acme.yaml
@@ -0,0 +1,16 @@
+- name: 'Build arguments for letsencrypt acme.sh driver for: {{ item.key }}'
+  set_fact:
+    acme_args: '"{% for domain in item.value %}-d {{ domain }} {% endfor %}"'
+
+- name: 'Run acme.sh driver for {{ item.key }} certificate issue'
+  shell:
+    cmd: |
+      /opt/acme.sh/driver.sh {{ 'selfsign' if letsencrypt_test_only else 'renew' }}  {{ acme_args }}
+  args:
+    chdir: /opt/acme.sh/
+  register: acme_output
+
+- debug:
+    var: acme_output.stdout_lines
+
+# Keys generated!
\ No newline at end of file
diff --git a/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml b/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml
new file mode 100644
index 0000000000..449e2f09c7
--- /dev/null
+++ b/playbooks/roles/letsencrypt-create-certs/tasks/main.yaml
@@ -0,0 +1,13 @@
+# NOTE(ianw): this var set for the host by the
+# letsencrypt-request-certs role; running this when empty would be a
+# no-op but we might as well skip it if we know this host hasn't
+# requested anything to actually create/renew.
+- name: Check for prerun state
+  fail:
+    msg: "acme_txt_required is not defined; was letsencrypt-request-certs run?"
+  when: acme_txt_required is not defined
+
+- name: Include ACME renewal
+  include_tasks: acme.yaml
+  loop: "{{ query('dict', letsencrypt_certs) }}"
+  when: acme_txt_required | length > 0
diff --git a/playbooks/roles/letsencrypt-install-txt-record/README.rst b/playbooks/roles/letsencrypt-install-txt-record/README.rst
new file mode 100644
index 0000000000..186fa65e3e
--- /dev/null
+++ b/playbooks/roles/letsencrypt-install-txt-record/README.rst
@@ -0,0 +1,19 @@
+Install authentication records for letsencrypt
+
+Install TXT records to the ``acme.opendev.org`` domain.  This role
+runs only the adns server, and assumes ownership of the
+``/var/lib/bind/zones/acme.opendev.org/zone.db`` file.  After
+installation the nameserver is refreshed.
+
+After this, ``letsencrypt-create-certs`` can run on each host to
+provision the certificates.
+
+**Role Variables**
+
+.. zuul:rolevar:: acme_txt_required
+
+   A global dictionary of TXT records to be installed.  This is
+   generated in a prior step on each host by the
+   ``letsencrypt-request-certs`` role.
+
+
diff --git a/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml b/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml
new file mode 100644
index 0000000000..db77d5dd8d
--- /dev/null
+++ b/playbooks/roles/letsencrypt-install-txt-record/tasks/main.yaml
@@ -0,0 +1,35 @@
+- name: Make key list
+  set_fact:
+    acme_txt_keys: []
+
+- name: Build key list
+  set_fact:
+    acme_txt_keys: '{{ acme_txt_keys }} + {{ hostvars[item]["acme_txt_required"] }}'
+  with_inventory_hostnames: letsencrypt
+
+- name: Final list
+  debug:
+    var: acme_txt_keys
+
+# NOTE(ianw): Most of the time, we won't have anything to actually do
+# as we don't have new keys or renewals due.
+- name: Deploy TXT records
+  block:
+    - name: Deploy new zone.db
+      template:
+        src: zone.db.j2
+        dest: /var/lib/bind/zones/acme.opendev.org/zone.db
+
+    - name: debug new file
+      slurp:
+        src: '/var/lib/bind/zones/acme.opendev.org/zone.db'
+      register: bind_zone_result
+    - debug:
+        msg: "{{ bind_zone_result['content'] | b64decode }}"
+
+    - name: Ensure domain is valid
+      shell: named-checkzone acme.opendev.org /var/lib/bind/zones/acme.opendev.org/zone.db
+
+    - name: Reload domain
+      shell: rndc reload acme.opendev.org
+  when: acme_txt_keys | length > 0
\ No newline at end of file
diff --git a/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2 b/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2
new file mode 100644
index 0000000000..a888051fda
--- /dev/null
+++ b/playbooks/roles/letsencrypt-install-txt-record/templates/zone.db.j2
@@ -0,0 +1,17 @@
+; -*- mode: zone -*-
+$ORIGIN acme.opendev.org.
+$TTL 1m
+@               IN      SOA     adns1.opendev.org. hostmaster.opendev.org. (
+                        {{ ansible_date_time.epoch }}  ; serial number unixtime
+                        1h          ; refresh (secondary checks for updates)
+                        10m         ; retry   (secondary retries failed axfr)
+                        10d         ; expire  (secondary ends serving old data)
+                        5m  )       ; min ttl (cache time for failed lookups)
+@               IN      NS      ns1.opendev.org.
+@               IN      NS      ns2.opendev.org.
+
+; NOTE: DO NOT HAND EDIT.  THESE KEYS ARE MANAGED BY ANSIBLE
+
+{% for key in acme_txt_keys %}
+@	IN	TXT	"{{key[1]}}"
+{% endfor %}
diff --git a/playbooks/roles/letsencrypt-request-certs/README.rst b/playbooks/roles/letsencrypt-request-certs/README.rst
new file mode 100644
index 0000000000..181f2e092e
--- /dev/null
+++ b/playbooks/roles/letsencrypt-request-certs/README.rst
@@ -0,0 +1,53 @@
+Request certificates from letsencrypt
+
+The role requests certificates (or renews expiring certificates, which
+is fundamentally the same thing) from letsencrypt for a host.  This
+requires the ``acme.sh`` tool and driver which should have been
+installed by the ``letsencrypt-acme-sh-install`` role.
+
+This role does not create the certificates.  It will request the
+certificates from letsencrypt and populate the authentication data
+into the ``acme_txt_required`` variable.  These values need to be
+installed and activated on the DNS server by the
+``letsencrypt-install-txt-record`` role; the
+``letsencrypt-create-certs`` will then finish the certificate
+provision process.
+
+**Role Variables**
+
+.. zuul:rolevar:: letsencrypt_test_only
+
+   Uses staging, rather than prodcution requests to letsencrypt
+
+.. zuul:rolevar:: letsencrypt_certs
+
+   A host wanting a certificate should define a dictionary variable
+   ``letsencyrpt_certs``.  Each key in this dictionary is a separate
+   certificate to create (i.e. a host can create multiple separate
+   certificates).  Each key should have a list of hostnames valid for
+   that certificate.  The certificate will be named for the *first*
+   entry.
+
+   For example:
+
+   .. code-block:: yaml
+
+     letsencrypt_certs:
+       main:
+         - hostname01.opendev.org
+         - hostname.opendev.org
+       secondary:
+         - foo.opendev.org
+
+   will ultimately result in two certificates being provisioned on the
+   host in ``/etc/letsencrypt-certs/hostname01.opendev.org`` and
+   ``/etc/letsencrypt-certs/foo.opendev.org``.
+
+   Note that each entry will require a ``CNAME`` pointing the ACME
+   challenge domain to the TXT record that will be created in the
+   signing domain.  For example above, the following records would need
+   to be pre-created::
+
+     _acme-challenge.hostname01.opendev.org.  IN   CNAME  acme.opendev.org.
+     _acme-challenge.hostname.opendev.org.    IN   CNAME  acme.opendev.org.
+     _acme-challenge.foo.opendev.org.         IN   CNAME  acme.opendev.org.
diff --git a/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml b/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml
new file mode 100644
index 0000000000..b62bfec72c
--- /dev/null
+++ b/playbooks/roles/letsencrypt-request-certs/defaults/main.yaml
@@ -0,0 +1 @@
+letsencrypt_test_only: False
\ No newline at end of file
diff --git a/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml b/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml
new file mode 100644
index 0000000000..1c4672d4c6
--- /dev/null
+++ b/playbooks/roles/letsencrypt-request-certs/tasks/acme.yaml
@@ -0,0 +1,29 @@
+- name: 'Build arguments for letsencrypt acme.sh driver for: {{ cert.key }}'
+  set_fact:
+    # NOTE(ianw): note the domains are passed in one string (between
+    # ") as it makes argument parsing a little easier in the driver.sh
+    acme_args: '"{% for domain in cert.value %}-d {{ domain }} {% endfor %}"'
+
+- name: Run acme.sh driver for certificate issue
+  shell:
+    cmd: |
+      /opt/acme.sh/driver.sh issue {{ acme_args }}
+  args:
+    chdir: /opt/acme.sh/
+  environment:
+    LETSENCRYPT_STAGING: '{{ "1" if letsencrypt_test_only else "0" }}'
+  register: acme_output
+
+- debug:
+    var: acme_output.stdout_lines
+
+# NOTE(ianw): The output is domain:key which we split into a tuple
+# here.  We don't make use of the domain part ATM; our default CNAME
+# setup points "_acme-challenge.host.acme.opendev.org" to just
+# "acme.opendev.org" so we put all the keys into "top-level" TXT
+# records directly at acme.opendev.org.  letsencyrpt doesn't care; it
+# just follows the CNAME and enumerates all the TXT records in
+# acme.opendev.org looking for one that matches.
+- set_fact:
+    acme_txt_required: '{{ acme_txt_required + [(item.split(":")[0], item.split(":")[1])] }}'
+  loop: '{{ acme_output.stdout_lines }}'
diff --git a/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml b/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml
new file mode 100644
index 0000000000..50090e2297
--- /dev/null
+++ b/playbooks/roles/letsencrypt-request-certs/tasks/main.yaml
@@ -0,0 +1,25 @@
+- set_fact:
+    acme_txt_required: []
+
+- name: Show cert list
+  debug:
+    var: letsencrypt_certs
+
+# Handle multiple certs for a single host; like
+#
+# letsencrypt_certs:
+#    main:
+#      hostname.opendev.org
+#    secondary:
+#      foo.opendev.org
+#      baz.opendev.org
+#
+# All required TXT keys are put into acme_txt_required
+
+- include_tasks: acme.yaml
+  loop: "{{ query('dict', letsencrypt_certs) }}"
+  loop_control:
+    loop_var: cert
+
+- debug:
+    var: acme_txt_required
diff --git a/playbooks/zuul/run-base.yaml b/playbooks/zuul/run-base.yaml
index 291221cdbb..372f5c9be5 100644
--- a/playbooks/zuul/run-base.yaml
+++ b/playbooks/zuul/run-base.yaml
@@ -65,7 +65,10 @@
         - group_vars/registry.yaml
         - group_vars/gitea.yaml
         - group_vars/gitea-lb.yaml
+        - group_vars/letsencrypt.yaml
         - host_vars/bridge.openstack.org.yaml
+        - host_vars/letsencrypt01.opendev.org.yaml
+        - host_vars/letsencrypt02.opendev.org.yaml
     - name: Display group membership
       command: ansible localhost -m debug -a 'var=groups'
     - name: Run base.yaml
diff --git a/playbooks/zuul/templates/gate-groups.yaml.j2 b/playbooks/zuul/templates/gate-groups.yaml.j2
index a8c8bfeb05..7f2c0b3b52 100644
--- a/playbooks/zuul/templates/gate-groups.yaml.j2
+++ b/playbooks/zuul/templates/gate-groups.yaml.j2
@@ -10,3 +10,7 @@ groups:
 
   docker:
     - bionic-docker
+
+  letsencrypt:
+    - letsencrypt01.opendev.org
+    - letsencrypt02.opendev.org
diff --git a/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2 b/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2
new file mode 100644
index 0000000000..1d315d24e9
--- /dev/null
+++ b/playbooks/zuul/templates/group_vars/letsencrypt.yaml.j2
@@ -0,0 +1,4 @@
+# We don't want CI tests trying to really authenticate against
+# letsencrypt; apart from just being unfriendly it might cause quota
+# issues.
+letsencrypt_test_only: True
diff --git a/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2 b/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2
new file mode 100644
index 0000000000..f116c5219a
--- /dev/null
+++ b/playbooks/zuul/templates/host_vars/letsencrypt01.opendev.org.yaml.j2
@@ -0,0 +1,7 @@
+letsencrypt_certs:
+  main:
+    - letsencrypt01.opendev.org
+    - letsencrypt.opendev.org
+    - alias.opendev.org
+  secondary:
+    - someotherservice.opendev.org
\ No newline at end of file
diff --git a/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2 b/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2
new file mode 100644
index 0000000000..9ba000715a
--- /dev/null
+++ b/playbooks/zuul/templates/host_vars/letsencrypt02.opendev.org.yaml.j2
@@ -0,0 +1,4 @@
+letsencrypt_certs:
+  main:
+    - letsencrypt02.opendev.org
+    - letsencrypt.opendev.org
diff --git a/testinfra/test_letsencrypt.py b/testinfra/test_letsencrypt.py
new file mode 100644
index 0000000000..841a5c3da4
--- /dev/null
+++ b/testinfra/test_letsencrypt.py
@@ -0,0 +1,60 @@
+# Copyright 2019 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License. You may obtain
+# a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+testinfra_hosts = ['adns-letsencrypt.opendev.org',
+                   'letsencrypt01.opendev.org',
+                   'letsencrypt02.opendev.org']
+
+
+def test_acme_zone(host):
+    if host.backend.get_hostname() != 'adns-letsencrypt.opendev.org':
+        pytest.skip()
+    acme_opendev_zone = host.file('/var/lib/bind/zones/acme.opendev.org/zone.db')
+    assert acme_opendev_zone.exists
+
+    # On our test nodes, unbound is listening on 127.0.0.1:53; this
+    # ensures the query hits bind
+    query_addr = host.ansible("setup")["ansible_facts"]["ansible_default_ipv4"]["address"]
+    cmd = host.run("dig -t txt acme.opendev.org @" + query_addr)
+    count = 0
+    for line in cmd.stdout.split('\n'):
+        if line.startswith('acme.opendev.org.	60	IN	TXT'):
+            count = count + 1
+    if count != 6:
+        # NOTE(ianw): I'm sure there's more pytest-y ways to save this
+        # for debugging ...
+        print(cmd.stdout)
+    assert count == 6, "Did not see required number of TXT records!"
+
+def test_certs_created(host):
+    if host.backend.get_hostname() == 'letsencrypt01.opendev.org':
+        domain_one = host.file(
+            '/etc/letsencrypt-certs/'
+            'letsencrypt01.opendev.org/letsencrypt01.opendev.org.key')
+        assert domain_one.exists
+        domain_two = host.file(
+            '/etc/letsencrypt-certs/'
+            'someotherservice.opendev.org/someotherservice.opendev.org.key')
+        assert domain_two.exists
+
+    elif host.backend.get_hostname() == 'letsencrypt02.opendev.org':
+        domain_one = host.file(
+            '/etc/letsencrypt-certs/'
+            'letsencrypt02.opendev.org/letsencrypt02.opendev.org.key')
+        assert domain_one.exists
+
+    else:
+        pytest.skip()