shipyard/images/airflow/Dockerfile.ubuntu_jammy
Sergiy Markin 86b7d03403 Fix deprecated code
This PS replaces deprecared module pkg_resources,
also fixes some unit tests and fixes the schema
validation by adding specific schema draft to
choose in order to prevent the processor to fall
back to use the latest draft that may potentially
cause issues.

Also this PS bumps kubectl version to 1.32.0 due
to CVE and switched base ubuntu image repo to
quay.io/airshipit

Change-Id: Ie40f179eac83fde4d828e6f63a9c03e473eb3b15
2024-12-26 19:51:19 +00:00

193 lines
6.8 KiB
Docker

# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Docker image to run Airflow on Kubernetes
#
# In order to fix rate limit error
#
# 429 Too Many Requests - Server message: too many requests:
# You have reached your pull rate limit.
# You may increase the limit by authenticating and upgrading: https://www.docker.com/increase-rate-limit
ARG FROM=quay.io/airshipit/ubuntu:jammy
FROM ${FROM}
LABEL org.opencontainers.image.authors='airship-discuss@lists.airshipit.org, irc://#airshipit@freenode' \
org.opencontainers.image.url='https://airshipit.org' \
org.opencontainers.image.documentation='https://airship-shipyard.readthedocs.org' \
org.opencontainers.image.source='https://opendev.org/airship/shipyard' \
org.opencontainers.image.vendor='The Airship Authors' \
org.opencontainers.image.licenses='Apache-2.0'
# Do not prompt user for choices on installation/configuration of packages
# Set port 8080 for Airflow Web
# Set port 5555 for Airflow Flower
# Set port 8793 for Airflow Worker
ENV container docker
ENV WEB_PORT 8080
ENV FLOWER_PORT 5555
ENV WORKER_PORT 8793
ENV SLUGIFY_USES_TEXT_UNIDECODE yes
ENV PYTHONWARNINGS=ignore::DeprecationWarning,ignore::FutureWarning
# Expose port for applications
EXPOSE $WEB_PORT
EXPOSE $FLOWER_PORT
EXPOSE $WORKER_PORT
# Set ARG for usage during build
ARG AIRFLOW_HOME=/usr/local/airflow
# Moved celery to images/airflow/requirements.txt as apache-airflow uses a
# version of celery incompatibile with the version of kombu needed by other
# Airship components
ARG AIRFLOW_SRC="apache-airflow[crypto,celery,hive,hdfs,jdbc,postgres]==2.10.4"
ARG DEBIAN_FRONTEND=noninteractive
ARG ctx_base=src/bin
# Kubectl version
ARG KUBECTL_VERSION=1.32.0
# Needed from apache-airflow 1.10.2, since core.airflow_home config is deprecated
ENV AIRFLOW_HOME=${AIRFLOW_HOME}
RUN set -ex \
&& apt-get update && apt-get upgrade -y \
&& apt-get install -y wget curl \
apt-transport-https ca-certificates gnupg \
&& echo "deb http://apt.postgresql.org/pub/repos/apt jammy-pgdg main" > /etc/apt/sources.list.d/pgdg.list \
&& curl -o /etc/apt/trusted.gpg.d/postgresql_release_signing_key.asc 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' \
&& apt update \
&& apt -y install \
automake \
build-essential \
ca-certificates \
curl \
git \
g++ \
libkrb5-dev \
libffi-dev \
libssl-dev \
libpq-dev \
libpq5 \
libtool \
libxml2 \
libsasl2-dev \
locales \
netcat \
netbase \
postgresql-client \
postgresql-common \
python3 \
python3-setuptools \
python3-pip \
python3-dev \
python3-dateutil \
make \
--no-install-recommends \
&& python3 -m pip install -U pip \
&& apt-get clean \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/* \
/usr/share/man \
/usr/share/doc \
/usr/share/doc-base \
/etc/ssl/private/ssl-cert-snakeoil.key \
/etc/ssl/private/ssl-cert-snakeoil.pem
# Install LibYAML
ENV LD_LIBRARY_PATH=/usr/local/lib
ARG LIBYAML_VERSION=0.2.5
RUN set -ex \
&& git clone https://github.com/yaml/libyaml.git \
&& cd libyaml \
&& git checkout $LIBYAML_VERSION \
&& ./bootstrap \
&& ./configure \
&& make \
&& make install \
&& cd .. \
&& rm -fr libyaml
# Things that change mostly infrequently
RUN useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \
&& curl -L -o /usr/local/bin/kubectl \
https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl \
&& chmod +x /usr/local/bin/kubectl
# Dependency requirements
# Note - removing snakebite (python 2 vs. 3). See:
# https://github.com/puckel/docker-airflow/issues/77
# Install Airflow directly to allow overriding source
# COPY images/airflow/requirements.txt /tmp/
COPY ${ctx_base}/shipyard_airflow/requirements-frozen.txt /tmp/requirements.txt
RUN pip3 install -r /tmp/requirements.txt --no-cache-dir \
&& pip3 install $AIRFLOW_SRC --no-cache-dir
# && (pip3 uninstall -y snakebite || true) \
# && (pip3 uninstall -y psycopg2 || true) \
# && (pip3 install --no-cache-dir --force-reinstall $(pip freeze | grep psycopg2-binary) || true)
# Copy scripts used in the container:
COPY images/airflow/script/*.sh ${AIRFLOW_HOME}/
# Copy configuration (e.g. logging config for Airflow):
COPY images/airflow/config/*.py ${AIRFLOW_HOME}/config/
COPY images/airflow/webserver_config.py ${AIRFLOW_HOME}/
# Change permissions
RUN chown -R airflow: ${AIRFLOW_HOME}
# Setting the version explicitly for PBR
ENV PBR_VERSION 0.1a1
# Shipyard
#
# Shipyard provides core functionality used by the Airflow plugins/operators
# Since Shipyard and Airflow are built together as images, this should prevent
# stale or out-of-date code between these parts.
# Shipyard requirements, source and installation
# COPY ${ctx_base}/shipyard_airflow/requirements-frozen.txt /tmp/api_requirements.txt
# RUN pip3 install -r /tmp/api_requirements.txt --no-cache-dir
COPY ${ctx_base}/shipyard_airflow /tmp/shipyard/
RUN cd /tmp/shipyard \
&& pip3 install $(pwd) --use-pep517
# Note: The value for the dags and plugins directories that are sourced
# from the values.yaml of the Shipyard Helm chart need to align with these
# directories. If they do not, airflow will not find the intended dags and
# plugins.
#
# Note: In the case of building images using the provided Makefile, a test is
# run against the built-in dags provided with Airflow. Since there is no Helm
# chart to reconfigure the airflow.cfg with these directories, these dags and
# plugins are not known to Airflow during the image test.
#
# Copy the plugins and dags that will be used by this Airflow image:
COPY ${ctx_base}/shipyard_airflow/shipyard_airflow/plugins ${AIRFLOW_HOME}/plugins/
COPY ${ctx_base}/shipyard_airflow/shipyard_airflow/plugins \
/usr/local/lib/python3.10/dist-packages/airflow/plugins/
COPY ${ctx_base}/shipyard_airflow/shipyard_airflow/dags ${AIRFLOW_HOME}/dags/
COPY ${ctx_base}/shipyard_airflow/shipyard_airflow/dags \
/usr/local/lib/python3.10/dist-packages/airflow/dags/
# Set work directory
USER airflow
WORKDIR ${AIRFLOW_HOME}
# Execute entrypoint
ENTRYPOINT ["./entrypoint.sh"]