diff --git a/bitnami/airflow/2/debian-12/Dockerfile b/bitnami/airflow/2/debian-12/Dockerfile index ede5c877b6bf2..ed679a52fa4af 100644 --- a/bitnami/airflow/2/debian-12/Dockerfile +++ b/bitnami/airflow/2/debian-12/Dockerfile @@ -8,11 +8,11 @@ ARG TARGETARCH LABEL com.vmware.cp.artifact.flavor="sha256:c50c90cfd9d12b445b011e6ad529f1ad3daea45c26d20b00732fae3cd71f6a83" \ org.opencontainers.image.base.name="docker.io/bitnami/minideb:bookworm" \ - org.opencontainers.image.created="2024-10-24T14:03:28Z" \ + org.opencontainers.image.created="2024-10-30T14:53:31Z" \ org.opencontainers.image.description="Application packaged by Broadcom, Inc." \ org.opencontainers.image.documentation="https://github.com/bitnami/containers/tree/main/bitnami/airflow/README.md" \ org.opencontainers.image.licenses="Apache-2.0" \ - org.opencontainers.image.ref.name="2.10.2-debian-12-r3" \ + org.opencontainers.image.ref.name="2.10.2-debian-12-r4" \ org.opencontainers.image.source="https://github.com/bitnami/containers/tree/main/bitnami/airflow" \ org.opencontainers.image.title="airflow" \ org.opencontainers.image.vendor="Broadcom, Inc." \ @@ -26,12 +26,11 @@ ENV HOME="/" \ COPY prebuildfs / SHELL ["/bin/bash", "-o", "errexit", "-o", "nounset", "-o", "pipefail", "-c"] # Install required system packages and dependencies -RUN install_packages ca-certificates curl git krb5-user libbsd0 libbz2-1.0 libcom-err2 libcrypt1 libedit2 libffi8 libgcc-s1 libgmp10 libgnutls30 libgss-dev libgssapi-krb5-2 libhogweed6 libicu72 libidn2-0 libk5crypto3 libkeyutils1 libkrb5-3 libkrb5-dev libkrb5support0 libldap-2.5-0 liblz4-1 liblzma5 libmariadb3 libmd0 libncursesw6 libnettle8 libnsl2 libp11-kit0 libreadline8 libsasl2-2 libsasl2-modules libsqlite3-0 libssl3 libstdc++6 libsybdb5 libtasn1-6 libtinfo6 libtirpc3 libudev1 libunistring2 libuuid1 libxml2 libxslt1.1 locales netbase procps tzdata zlib1g +RUN install_packages ca-certificates curl git krb5-user libbz2-1.0 libcom-err2 libcrypt1 libffi8 libgcc-s1 libgmp10 libgnutls30 libgss-dev libgssapi-krb5-2 libhogweed6 libidn2-0 libk5crypto3 libkeyutils1 libkrb5-3 libkrb5-dev libkrb5support0 libldap-2.5-0 liblzma5 libmariadb3 libncursesw6 libnettle8 libnsl2 libp11-kit0 libreadline8 libsasl2-2 libsasl2-modules libsqlite3-0 libssl3 libstdc++6 libsybdb5 libtasn1-6 libtinfo6 libtirpc3 libudev1 libunistring2 locales netbase procps tzdata zlib1g RUN mkdir -p /tmp/bitnami/pkg/cache/ ; cd /tmp/bitnami/pkg/cache/ ; \ COMPONENTS=( \ - "wait-for-port-1.0.8-5-linux-${OS_ARCH}-debian-12" \ - "python-3.12.7-2-linux-${OS_ARCH}-debian-12" \ - "postgresql-client-17.0.0-0-linux-${OS_ARCH}-debian-12" \ + "wait-for-port-1.0.8-6-linux-${OS_ARCH}-debian-12" \ + "python-3.12.7-3-linux-${OS_ARCH}-debian-12" \ "ini-file-1.4.7-6-linux-${OS_ARCH}-debian-12" \ "airflow-2.10.2-1-linux-${OS_ARCH}-debian-12" \ ) ; \ @@ -68,9 +67,9 @@ ENV AIRFLOW_HOME="/opt/bitnami/airflow" \ LNAME="airflow" \ NSS_WRAPPER_GROUP="/opt/bitnami/airflow/nss-wrapper/nss_group" \ NSS_WRAPPER_PASSWD="/opt/bitnami/airflow/nss-wrapper/nss_passwd" \ - PATH="/opt/bitnami/common/bin:/opt/bitnami/python/bin:/opt/bitnami/postgresql/bin:/opt/bitnami/airflow/venv/bin:$PATH" + PATH="/opt/bitnami/common/bin:/opt/bitnami/python/bin:/opt/bitnami/airflow/venv/bin:$PATH" -EXPOSE 8080 +EXPOSE 8080 8793 USER 1001 ENTRYPOINT [ "/opt/bitnami/scripts/airflow/entrypoint.sh" ] diff --git a/bitnami/airflow/2/debian-12/prebuildfs/opt/bitnami/.bitnami_components.json b/bitnami/airflow/2/debian-12/prebuildfs/opt/bitnami/.bitnami_components.json index f1e98c51aca62..539d30c4530e5 100644 --- a/bitnami/airflow/2/debian-12/prebuildfs/opt/bitnami/.bitnami_components.json +++ b/bitnami/airflow/2/debian-12/prebuildfs/opt/bitnami/.bitnami_components.json @@ -11,22 +11,16 @@ "type": "NAMI", "version": "1.4.7-6" }, - "postgresql-client": { - "arch": "amd64", - "distro": "debian-12", - "type": "NAMI", - "version": "17.0.0-0" - }, "python": { "arch": "amd64", "distro": "debian-12", "type": "NAMI", - "version": "3.12.7-2" + "version": "3.12.7-3" }, "wait-for-port": { "arch": "amd64", "distro": "debian-12", "type": "NAMI", - "version": "1.0.8-5" + "version": "1.0.8-6" } } \ No newline at end of file diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow-env.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow-env.sh index 79bdf7ad870f0..acb7479eae09d 100644 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow-env.sh +++ b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow-env.sh @@ -29,19 +29,21 @@ airflow_env_vars=( AIRFLOW_FIRSTNAME AIRFLOW_LASTNAME AIRFLOW_EMAIL + AIRFLOW_COMPONENT_TYPE AIRFLOW_EXECUTOR AIRFLOW_RAW_FERNET_KEY AIRFLOW_FORCE_OVERWRITE_CONF_FILE AIRFLOW_FERNET_KEY AIRFLOW_SECRET_KEY + AIRFLOW_WEBSERVER_BASE_URL AIRFLOW_WEBSERVER_HOST AIRFLOW_WEBSERVER_PORT_NUMBER AIRFLOW_LOAD_EXAMPLES - AIRFLOW_BASE_URL AIRFLOW_HOSTNAME_CALLABLE AIRFLOW_POOL_NAME AIRFLOW_POOL_SIZE AIRFLOW_POOL_DESC + AIRFLOW_WORKER_QUEUE AIRFLOW_DATABASE_HOST AIRFLOW_DATABASE_PORT_NUMBER AIRFLOW_DATABASE_NAME @@ -67,6 +69,8 @@ airflow_env_vars=( AIRFLOW_LDAP_USE_TLS AIRFLOW_LDAP_ALLOW_SELF_SIGNED AIRFLOW_LDAP_TLS_CA_CERTIFICATE + AIRFLOW_BASE_URL + AIRFLOW_QUEUE ) for env_var in "${airflow_env_vars[@]}"; do file_env_var="${env_var}_FILE" @@ -83,15 +87,13 @@ unset airflow_env_vars # Airflow paths export AIRFLOW_BASE_DIR="${BITNAMI_ROOT_DIR}/airflow" -export AIRFLOW_HOME="${AIRFLOW_BASE_DIR}" +export AIRFLOW_HOME="$AIRFLOW_BASE_DIR" export AIRFLOW_BIN_DIR="${AIRFLOW_BASE_DIR}/venv/bin" export AIRFLOW_LOGS_DIR="${AIRFLOW_BASE_DIR}/logs" export AIRFLOW_SCHEDULER_LOGS_DIR="${AIRFLOW_LOGS_DIR}/scheduler" -export AIRFLOW_LOG_FILE="${AIRFLOW_LOGS_DIR}/airflow-webserver.log" export AIRFLOW_CONF_FILE="${AIRFLOW_BASE_DIR}/airflow.cfg" export AIRFLOW_WEBSERVER_CONF_FILE="${AIRFLOW_BASE_DIR}/webserver_config.py" export AIRFLOW_TMP_DIR="${AIRFLOW_BASE_DIR}/tmp" -export AIRFLOW_PID_FILE="${AIRFLOW_TMP_DIR}/airflow-webserver.pid" export AIRFLOW_DAGS_DIR="${AIRFLOW_BASE_DIR}/dags" export PATH="${AIRFLOW_BIN_DIR}:${BITNAMI_ROOT_DIR}/common/bin:${PATH}" @@ -107,19 +109,23 @@ export AIRFLOW_LASTNAME="${AIRFLOW_LASTNAME:-Lastname}" export AIRFLOW_EMAIL="${AIRFLOW_EMAIL:-user@example.com}" # Airflow configuration +export AIRFLOW_COMPONENT_TYPE="${AIRFLOW_COMPONENT_TYPE:-webserver}" export AIRFLOW_EXECUTOR="${AIRFLOW_EXECUTOR:-SequentialExecutor}" export AIRFLOW_RAW_FERNET_KEY="${AIRFLOW_RAW_FERNET_KEY:-}" export AIRFLOW_FORCE_OVERWRITE_CONF_FILE="${AIRFLOW_FORCE_OVERWRITE_CONF_FILE:-no}" export AIRFLOW_FERNET_KEY="${AIRFLOW_FERNET_KEY:-}" export AIRFLOW_SECRET_KEY="${AIRFLOW_SECRET_KEY:-}" +AIRFLOW_WEBSERVER_BASE_URL="${AIRFLOW_WEBSERVER_BASE_URL:-"${AIRFLOW_BASE_URL:-}"}" +export AIRFLOW_WEBSERVER_BASE_URL="${AIRFLOW_WEBSERVER_BASE_URL:-}" export AIRFLOW_WEBSERVER_HOST="${AIRFLOW_WEBSERVER_HOST:-127.0.0.1}" export AIRFLOW_WEBSERVER_PORT_NUMBER="${AIRFLOW_WEBSERVER_PORT_NUMBER:-8080}" export AIRFLOW_LOAD_EXAMPLES="${AIRFLOW_LOAD_EXAMPLES:-yes}" -export AIRFLOW_BASE_URL="${AIRFLOW_BASE_URL:-}" export AIRFLOW_HOSTNAME_CALLABLE="${AIRFLOW_HOSTNAME_CALLABLE:-}" export AIRFLOW_POOL_NAME="${AIRFLOW_POOL_NAME:-}" export AIRFLOW_POOL_SIZE="${AIRFLOW_POOL_SIZE:-}" export AIRFLOW_POOL_DESC="${AIRFLOW_POOL_DESC:-}" +AIRFLOW_WORKER_QUEUE="${AIRFLOW_WORKER_QUEUE:-"${AIRFLOW_QUEUE:-}"}" +export AIRFLOW_WORKER_QUEUE="${AIRFLOW_WORKER_QUEUE:-}" # Airflow database configuration export AIRFLOW_DATABASE_HOST="${AIRFLOW_DATABASE_HOST:-postgresql}" diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/entrypoint.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/entrypoint.sh index 3a097862ab6f9..d1d30359d06c5 100755 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/entrypoint.sh +++ b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/entrypoint.sh @@ -36,7 +36,6 @@ fi if [[ "$*" = *"/opt/bitnami/scripts/airflow/run.sh"* || "$*" = *"/run.sh"* ]]; then info "** Starting Airflow setup **" - /opt/bitnami/scripts/postgresql-client/setup.sh /opt/bitnami/scripts/airflow/setup.sh info "** Airflow setup finished! **" fi diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/run.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/run.sh index f67a342f9b312..7d9ca26e34bf7 100755 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/run.sh +++ b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/run.sh @@ -16,11 +16,17 @@ set -o pipefail . /opt/bitnami/scripts/libos.sh . /opt/bitnami/scripts/libairflow.sh -args=("--pid" "$AIRFLOW_PID_FILE" "$@") +command=("$AIRFLOW_COMPONENT_TYPE") +args=("--pid" "${AIRFLOW_TMP_DIR}/airflow-${AIRFLOW_COMPONENT_TYPE}.pid" "$@") +if [[ "$AIRFLOW_COMPONENT_TYPE" = "worker" ]]; then + command=("celery" "worker") + [[ -n "$AIRFLOW_WORKER_QUEUE" ]] && args+=("-q" "$AIRFLOW_WORKER_QUEUE") + am_i_root && export C_FORCE_ROOT="true" +fi info "** Starting Airflow **" if am_i_root; then - exec_as_user "$AIRFLOW_DAEMON_USER" "${AIRFLOW_BIN_DIR}/airflow" "webserver" "${args[@]}" + exec_as_user "$AIRFLOW_DAEMON_USER" "${AIRFLOW_BIN_DIR}/airflow" "${command[@]}" "${args[@]}" else - exec "${AIRFLOW_BIN_DIR}/airflow" "webserver" "${args[@]}" + exec "${AIRFLOW_BIN_DIR}/airflow" "${command[@]}" "${args[@]}" fi diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/setup.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/setup.sh index 4adc5cf9038dd..0361f526768e8 100755 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/setup.sh +++ b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/airflow/setup.sh @@ -12,13 +12,6 @@ set -o pipefail # Load Airflow environment variables . /opt/bitnami/scripts/airflow-env.sh -# Load PostgreSQL Client environment for 'postgresql_remote_execute' (after 'airflow-env.sh' so that MODULE is not set to a wrong value) -if [[ -f /opt/bitnami/scripts/postgresql-client-env.sh ]]; then - . /opt/bitnami/scripts/postgresql-client-env.sh -elif [[ -f /opt/bitnami/scripts/postgresql-env.sh ]]; then - . /opt/bitnami/scripts/postgresql-env.sh -fi - # Load libraries . /opt/bitnami/scripts/libos.sh . /opt/bitnami/scripts/libfs.sh diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libairflow.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libairflow.sh index 6a23f34bedde2..4357ccd7ed93c 100644 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libairflow.sh +++ b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libairflow.sh @@ -15,13 +15,6 @@ . /opt/bitnami/scripts/libvalidations.sh . /opt/bitnami/scripts/libpersistence.sh -# Load database library -if [[ -f /opt/bitnami/scripts/libpostgresqlclient.sh ]]; then - . /opt/bitnami/scripts/libpostgresqlclient.sh -elif [[ -f /opt/bitnami/scripts/libpostgresql.sh ]]; then - . /opt/bitnami/scripts/libpostgresql.sh -fi - # Functions ######################## @@ -42,57 +35,106 @@ airflow_validate() { error_code=1 } + check_allowed_port() { + local validate_port_args=() + ! am_i_root && validate_port_args+=("-unprivileged") + validate_port_args+=("${!1}") + if ! err=$(validate_port "${validate_port_args[@]}"); then + print_validation_error "An invalid port was specified in the environment variable $1: $err" + fi + } + + check_resolved_hostname() { + if ! is_hostname_resolved "$1"; then + warn "Hostname $1 could not be resolved. This could lead to connection issues" + fi + } + check_multi_value() { if [[ " ${2} " != *" ${!1} "* ]]; then print_validation_error "The allowed values for ${1} are: ${2}" fi } - # Check postgresql host - [[ -z "$AIRFLOW_DATABASE_HOST" ]] && print_validation_error "Missing AIRFLOW_DATABASE_HOST" - - # Check LDAP parameters - if is_boolean_yes "$AIRFLOW_LDAP_ENABLE"; then - [[ -z "$AIRFLOW_LDAP_URI" ]] && print_validation_error "Missing AIRFLOW_LDAP_URI" - [[ -z "$AIRFLOW_LDAP_SEARCH" ]] && print_validation_error "Missing AIRFLOW_LDAP_SEARCH" - [[ -z "$AIRFLOW_LDAP_UID_FIELD" ]] && print_validation_error "Missing AIRFLOW_LDAP_UID_FIELD" - [[ -z "$AIRFLOW_LDAP_BIND_USER" ]] && print_validation_error "Missing AIRFLOW_LDAP_BIND_USER" - [[ -z "$AIRFLOW_LDAP_BIND_PASSWORD" ]] && print_validation_error "Missing AIRFLOW_LDAP_BIND_PASSWORD" - [[ -z "$AIRFLOW_LDAP_ROLES_MAPPING" ]] && print_validation_error "Missing AIRFLOW_LDAP_ROLES_MAPPING" - [[ -z "$AIRFLOW_LDAP_ROLES_SYNC_AT_LOGIN" ]] && print_validation_error "Missing AIRFLOW_LDAP_ROLES_SYNC_AT_LOGIN" - [[ -z "$AIRFLOW_LDAP_USER_REGISTRATION" ]] && print_validation_error "Missing AIRFLOW_LDAP_USER_REGISTRATION" - [[ -z "$AIRFLOW_LDAP_USER_REGISTRATION_ROLE" ]] && print_validation_error "Missing AIRFLOW_LDAP_USER_REGISTRATION_ROLE" - - # Chack boolean env vars contain valid values - for var in "AIRFLOW_LDAP_USER_REGISTRATION" "AIRFLOW_LDAP_ROLES_SYNC_AT_LOGIN" "AIRFLOW_LDAP_USE_TLS"; do - check_multi_value "$var" "True False" - done + check_positive_value() { + if ! is_positive_int "${!1}"; then + print_validation_error "The variable $1 must be positive integer" + fi + } - if [[ "$AIRFLOW_LDAP_USE_TLS" == "True" ]]; then - [[ -z "$AIRFLOW_LDAP_ALLOW_SELF_SIGNED" ]] && print_validation_error "Missing AIRFLOW_LDAP_ALLOW_SELF_SIGNED" - [[ -z "$AIRFLOW_LDAP_TLS_CA_CERTIFICATE" ]] && print_validation_error "Missing AIRFLOW_LDAP_TLS_CA_CERTIFICATE" + check_yes_no_value() { + if ! is_yes_no_value "${!1}"; then + print_validation_error "The allowed values for $1 are [yes, no]" fi + } - fi + check_empty_value() { + if is_empty_value "${!1}"; then + print_validation_error "The $1 environment variable is empty or not set." + fi + } - # Check pool parameters - if [[ -n "$AIRFLOW_POOL_NAME" ]]; then - [[ -z "$AIRFLOW_POOL_DESC" ]] && print_validation_error "Provided AIRFLOW_POOL_NAME but missing AIRFLOW_POOL_DESC" - [[ -z "$AIRFLOW_POOL_SIZE" ]] && print_validation_error "Provided AIRFLOW_POOL_NAME but missing AIRFLOW_POOL_SIZE" - fi + # Check component type & executor + check_empty_value "AIRFLOW_COMPONENT_TYPE" + check_multi_value "AIRFLOW_COMPONENT_TYPE" "webserver scheduler worker" + check_empty_value "AIRFLOW_EXECUTOR" # Check cryptography parameters if [[ -n "$AIRFLOW_RAW_FERNET_KEY" && -z "$AIRFLOW_FERNET_KEY" ]]; then - local fernet_char_count - fernet_char_count="$(echo -n "$AIRFLOW_RAW_FERNET_KEY")" - if [[ "$fernet_char_count" -lt 32 ]]; then + if validate_string "$AIRFLOW_RAW_FERNET_KEY" -min-length 32; then print_validation_error "AIRFLOW_RAW_FERNET_KEY must have at least 32 characters" - elif [[ "$fernet_char_count" -gt 32 ]]; then + elif validate_string "$AIRFLOW_RAW_FERNET_KEY" -max-length 32; then warn "AIRFLOW_RAW_FERNET_KEY has more than 32 characters, the rest will be ignored" fi AIRFLOW_FERNET_KEY="$(echo -n "${AIRFLOW_RAW_FERNET_KEY:0:32}" | base64)" fi + # Check database host and port number + check_empty_value "AIRFLOW_DATABASE_HOST" + check_resolved_hostname "$AIRFLOW_DATABASE_HOST" + check_positive_value AIRFLOW_DATABASE_PORT_NUMBER + check_positive_value REDIS_PORT_NUMBER + if [[ "$AIRFLOW_EXECUTOR" == "CeleryExecutor" || "$AIRFLOW_EXECUTOR" == "CeleryKubernetesExecutor" ]]; then + check_empty_value "REDIS_HOST" + check_resolved_hostname "$REDIS_HOST" + fi + + case "$AIRFLOW_COMPONENT_TYPE" in + webserver) + # Check webserver port number + check_allowed_port AIRFLOW_WEBSERVER_PORT_NUMBER + + # Check LDAP parameters + check_yes_no_value "AIRFLOW_LDAP_ENABLE" + if is_boolean_yes "$AIRFLOW_LDAP_ENABLE"; then + for var in "AIRFLOW_LDAP_URI" "AIRFLOW_LDAP_SEARCH" "AIRFLOW_LDAP_UID_FIELD" "AIRFLOW_LDAP_BIND_USER" "AIRFLOW_LDAP_BIND_PASSWORD" "AIRFLOW_LDAP_ROLES_MAPPING" "AIRFLOW_LDAP_ROLES_SYNC_AT_LOGIN" "AIRFLOW_LDAP_USER_REGISTRATION" "AIRFLOW_LDAP_USER_REGISTRATION_ROLE"; do + check_empty_value "$var" + done + for var in "AIRFLOW_LDAP_USER_REGISTRATION" "AIRFLOW_LDAP_ROLES_SYNC_AT_LOGIN" "AIRFLOW_LDAP_USE_TLS"; do + check_yes_no_value "$var" + done + if is_boolean_yes "$AIRFLOW_LDAP_USE_TLS"; then + for var in "AIRFLOW_LDAP_ALLOW_SELF_SIGNED" "AIRFLOW_LDAP_TLS_CA_CERTIFICATE"; do + check_empty_value "$var" + done + fi + fi + + # Check pool parameters + if [[ -n "$AIRFLOW_POOL_NAME" ]]; then + for var in "AIRFLOW_POOL_DESC" "AIRFLOW_POOL_SIZE"; do + check_empty_value "$var" + done + fi + ;; + scheduler|worker) + # Check webserver host and port number + check_empty_value "AIRFLOW_WEBSERVER_HOST" + check_resolved_hostname "$AIRFLOW_WEBSERVER_HOST" + check_positive_value AIRFLOW_WEBSERVER_PORT_NUMBER + ;; + esac + return "$error_code" } @@ -109,7 +151,7 @@ airflow_initialize() { info "Initializing Airflow ..." # Change permissions if running as root - for dir in "$AIRFLOW_TMP_DIR" "$AIRFLOW_LOGS_DIR" "$AIRFLOW_DAGS_DIR"; do + for dir in "$AIRFLOW_TMP_DIR" "$AIRFLOW_LOGS_DIR" "$AIRFLOW_SCHEDULER_LOGS_DIR" "$AIRFLOW_DAGS_DIR"; do ensure_dir_exists "$dir" am_i_root && chown "$AIRFLOW_DAEMON_USER:$AIRFLOW_DAEMON_GROUP" "$dir" done @@ -124,23 +166,37 @@ airflow_initialize() { info "Trying to connect to the database server" airflow_wait_for_postgresql_connection - # Check if the Airflow database has been already initialized - if ! airflow_execute db check-migrations; then - # Delete pid file - rm -f "$AIRFLOW_PID_FILE" - - # Initialize database - info "Populating database" - airflow_execute db init - airflow_create_admin_user - airflow_create_pool - else - # Upgrade database - info "Upgrading database schema" - airflow_execute db upgrade - true # Avoid return false when I am not root - fi + case "$AIRFLOW_COMPONENT_TYPE" in + webserver) + # Check if the Airflow database has been already initialized + if ! airflow_execute db check-migrations; then + # Remove pid file if exists to prevent error after WSL restarts + if [[ -f "${AIRFLOW_TMP_DIR}/airflow-webserver.pid" ]]; then + rm "${AIRFLOW_TMP_DIR}/airflow-webserver.pid" + fi + + # Initialize database + info "Populating database" + airflow_execute db init + + airflow_create_admin_user + airflow_create_pool + else + # Upgrade database + info "Upgrading database schema" + airflow_execute db upgrade + true # Avoid return false when I am not root + fi + ;; + scheduler|worker) + info "Waiting for Airflow Webserver to be up" + airflow_wait_for_webserver "$AIRFLOW_WEBSERVER_HOST" "$AIRFLOW_WEBSERVER_PORT_NUMBER" + if [[ "$AIRFLOW_EXECUTOR" == "CeleryExecutor" || "$AIRFLOW_EXECUTOR" == "CeleryKubernetesExecutor" ]]; then + wait-for-port --host "$REDIS_HOST" "$REDIS_PORT_NUMBER" + fi + ;; + esac } ######################## @@ -184,34 +240,44 @@ airflow_execute() { # None ######################### airflow_generate_config() { - # Create Airflow confirguration from default files - [[ ! -f "$AIRFLOW_CONF_FILE" ]] && cp "$(find "$AIRFLOW_BASE_DIR" -name default_airflow.cfg)" "$AIRFLOW_CONF_FILE" - [[ -n "$AIRFLOW_WEBSERVER_CONF_FILE" ]] && [[ ! -f "$AIRFLOW_WEBSERVER_CONF_FILE" ]] && cp "$(find "$AIRFLOW_BASE_DIR" -name default_webserver_config.py)" "$AIRFLOW_WEBSERVER_CONF_FILE" + case "$AIRFLOW_COMPONENT_TYPE" in + webserver) + # Create Airflow configuration from default files + [[ ! -f "$AIRFLOW_CONF_FILE" ]] && cp "$(find "$AIRFLOW_BASE_DIR" -name default_airflow.cfg)" "$AIRFLOW_CONF_FILE" + [[ ! -f "$AIRFLOW_WEBSERVER_CONF_FILE" ]] && cp "$(find "$AIRFLOW_BASE_DIR" -name default_webserver_config.py)" "$AIRFLOW_WEBSERVER_CONF_FILE" + # Setup Airflow webserver base URL + airflow_configure_webserver_base_url + # Configure Airflow webserver authentication + airflow_configure_webserver_authentication + ;; + scheduler|worker) + # Generate Airflow default files + debug_execute airflow version + ;; + esac - # Setup Airflow base URL - airflow_configure_base_url + # Configure the Webserver port + airflow_conf_set "webserver" "web_server_port" "$AIRFLOW_WEBSERVER_PORT_NUMBER" # Configure Airflow Hostname [[ -n "$AIRFLOW_HOSTNAME_CALLABLE" ]] && airflow_conf_set "core" "hostname_callable" "$AIRFLOW_HOSTNAME_CALLABLE" - # Configure Airflow webserver authentication - airflow_configure_webserver_authentication - # Configure Airflow to load examples - if is_boolean_yes "$AIRFLOW_LOAD_EXAMPLES"; then - airflow_conf_set "core" "load_examples" "True" - else - airflow_conf_set "core" "load_examples" "False" - fi # Configure Airflow database airflow_configure_database - # Configure the Webserver port - airflow_conf_set "webserver" "web_server_port" "$AIRFLOW_WEBSERVER_PORT_NUMBER" - # Setup the secret keys for database connection and flask application (fernet key and secret key) # ref: https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#fernet-key # ref: https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html#secret-key [[ -n "$AIRFLOW_FERNET_KEY" ]] && airflow_conf_set "core" "fernet_key" "$AIRFLOW_FERNET_KEY" [[ -n "$AIRFLOW_SECRET_KEY" ]] && airflow_conf_set "webserver" "secret_key" "$AIRFLOW_SECRET_KEY" + if [[ "$AIRFLOW_COMPONENT_TYPE" != "worker" ]]; then + # Configure Airflow to load examples + if is_boolean_yes "$AIRFLOW_LOAD_EXAMPLES"; then + airflow_conf_set "core" "load_examples" "True" + else + airflow_conf_set "core" "load_examples" "False" + fi + fi + # Configure Airflow executor airflow_conf_set "core" "executor" "$AIRFLOW_EXECUTOR" [[ "$AIRFLOW_EXECUTOR" == "CeleryExecutor" || "$AIRFLOW_EXECUTOR" == "CeleryKubernetesExecutor" ]] && airflow_configure_celery_executor @@ -237,7 +303,7 @@ airflow_conf_set() { } ######################## -# Configure Airflow base url +# Configure Airflow webserver base url # Globals: # AIRFLOW_* # Arguments: @@ -245,11 +311,11 @@ airflow_conf_set() { # Returns: # None ######################### -airflow_configure_base_url() { - if [[ -z "$AIRFLOW_BASE_URL" ]]; then +airflow_configure_webserver_base_url() { + if [[ -z "$AIRFLOW_WEBSERVER_BASE_URL" ]]; then airflow_conf_set "webserver" "base_url" "http://${AIRFLOW_WEBSERVER_HOST}:${AIRFLOW_WEBSERVER_PORT_NUMBER}" else - airflow_conf_set "webserver" "base_url" "$AIRFLOW_BASE_URL" + airflow_conf_set "webserver" "base_url" "$AIRFLOW_WEBSERVER_BASE_URL" fi } @@ -346,6 +412,27 @@ airflow_webserver_conf_set() { fi } +######################## +# Wait for Airflow Webserver +# Globals: +# AIRFLOW_* +# Arguments: +# None +# Returns: +# None +######################### +airflow_wait_for_webserver() { + local -r webserver_host="${1:?missing database host}" + local -r webserver_port="${2:?missing database port}" + check_webserver_connection() { + wait-for-port --host "$webserver_host" "$webserver_port" + } + if ! retry_while "check_webserver_connection"; then + error "Could not connect to the Airflow webserver" + return 1 + fi +} + ######################## # Configure Airflow database # Globals: @@ -470,7 +557,7 @@ airflow_create_pool() { ######################## # Check if Airflow is running # Globals: -# AIRFLOW_PID_FILE +# AIRFLOW_TMP_DIR # Arguments: # None # Returns: @@ -478,7 +565,7 @@ airflow_create_pool() { ######################## is_airflow_running() { local pid - pid="$(get_pid_from_file "$AIRFLOW_PID_FILE")" + pid="$(get_pid_from_file "${AIRFLOW_TMP_DIR}/airflow-webserver.pid")" if [[ -n "$pid" ]]; then is_service_running "$pid" else @@ -489,7 +576,7 @@ is_airflow_running() { ######################## # Check if Airflow is not running # Globals: -# AIRFLOW_PID_FILE +# AIRFLOW_TMP_DIR # Arguments: # None # Returns: @@ -502,7 +589,7 @@ is_airflow_not_running() { ######################## # Stop Airflow # Globals: -# AIRFLOW* +# AIRFLOW_TMP_DIR # Arguments: # None # Returns: @@ -510,7 +597,7 @@ is_airflow_not_running() { ######################### airflow_stop() { info "Stopping Airflow..." - stop_service_using_pid "$AIRFLOW_PID_FILE" + stop_service_using_pid "${AIRFLOW_TMP_DIR}/airflow-webserver.pid" } ######################## diff --git a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libpostgresqlclient.sh b/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libpostgresqlclient.sh deleted file mode 100644 index 65238dfff60bc..0000000000000 --- a/bitnami/airflow/2/debian-12/rootfs/opt/bitnami/scripts/libpostgresqlclient.sh +++ /dev/null @@ -1,424 +0,0 @@ -#!/bin/bash -# Copyright Broadcom, Inc. All Rights Reserved. -# SPDX-License-Identifier: APACHE-2.0 -# -# Bitnami PostgreSQL Client library - -# shellcheck disable=SC1091 - -# Load Generic Libraries -. /opt/bitnami/scripts/liblog.sh -. /opt/bitnami/scripts/libos.sh -. /opt/bitnami/scripts/libvalidations.sh - -######################## -# Validate settings in POSTGRESQL_CLIENT_* environment variables -# Globals: -# POSTGRESQL_CLIENT_* -# Arguments: -# None -# Returns: -# None -######################### -postgresql_client_validate() { - info "Validating settings in POSTGRESQL_CLIENT_* env vars" - local error_code=0 - - # Auxiliary functions - print_validation_error() { - error "$1" - error_code=1 - } - - empty_password_enabled_warn() { - warn "You set the environment variable ALLOW_EMPTY_PASSWORD=${ALLOW_EMPTY_PASSWORD}. For safety reasons, do not use this flag in a production environment." - } - empty_password_error() { - print_validation_error "The $1 environment variable is empty or not set. Set the environment variable ALLOW_EMPTY_PASSWORD=yes to allow the container to be started with blank passwords. This is recommended only for development." - } - - # Only validate environment variables if any action needs to be performed - local -a database_names - read -r -a database_names <<< "$(tr ',;' ' ' <<< "$POSTGRESQL_CLIENT_CREATE_DATABASE_NAMES")" - if [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" || "${#database_names[@]}" -gt 0 ]]; then - if is_boolean_yes "$ALLOW_EMPTY_PASSWORD"; then - empty_password_enabled_warn - else - if [[ -z "$POSTGRESQL_CLIENT_POSTGRES_PASSWORD" ]]; then - empty_password_error "POSTGRESQL_CLIENT_POSTGRES_PASSWORD" - fi - if [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" ]] && [[ -z "$POSTGRESQL_CLIENT_CREATE_DATABASE_PASSWORD" ]]; then - empty_password_error "POSTGRESQL_CLIENT_CREATE_DATABASE_PASSWORD" - fi - fi - fi - # When enabling extensions, the DB name must be provided - local -a extensions - read -r -a extensions <<< "$(tr ',;' ' ' <<< "$POSTGRESQL_CLIENT_CREATE_DATABASE_EXTENSIONS")" - if [[ "${#database_names[@]}" -le 0 && "${#extensions[@]}" -gt 0 ]]; then - print_validation_error "POSTGRESQL_CLIENT_CREATE_DATABASE_EXTENSIONS requires POSTGRESQL_CLIENT_CREATE_DATABASE_NAMES to be set." - fi - return "$error_code" -} - -######################## -# Perform actions to a database -# Globals: -# POSTGRESQL_CLIENT_* -# Arguments: -# None -# Returns: -# None -######################### -postgresql_client_initialize() { - local -a database_names - read -r -a database_names <<< "$(tr ',;' ' ' <<< "$POSTGRESQL_CLIENT_CREATE_DATABASE_NAMES")" - # Wait for the database to be accessible if any action needs to be performed - if [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" || "${#database_names[@]}" -gt 0 ]]; then - info "Trying to connect to the database server" - check_postgresql_connection() { - echo "SELECT 1" | postgresql_remote_execute "$POSTGRESQL_CLIENT_DATABASE_HOST" "$POSTGRESQL_CLIENT_DATABASE_PORT_NUMBER" "postgres" "$POSTGRESQL_CLIENT_POSTGRES_USER" "$POSTGRESQL_CLIENT_POSTGRES_PASSWORD" - } - if ! retry_while "check_postgresql_connection"; then - error "Could not connect to the database server" - return 1 - fi - fi - # Ensure a database user exists in the server - if [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" ]]; then - info "Creating database user ${POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME}" - local -a args=("$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" "--host" "$POSTGRESQL_CLIENT_DATABASE_HOST" "--port" "$POSTGRESQL_CLIENT_DATABASE_PORT_NUMBER") - [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_PASSWORD" ]] && args+=("-p" "$POSTGRESQL_CLIENT_CREATE_DATABASE_PASSWORD") - postgresql_ensure_user_exists "${args[@]}" - fi - # Ensure a database exists in the server (and that the user has write privileges, if specified) - if [[ "${#database_names[@]}" -gt 0 ]]; then - local -a createdb_args extensions - read -r -a extensions <<< "$(tr ',;' ' ' <<< "$POSTGRESQL_CLIENT_CREATE_DATABASE_EXTENSIONS")" - for database_name in "${database_names[@]}"; do - info "Creating database ${database_name}" - createdb_args=("$database_name" "--host" "$POSTGRESQL_CLIENT_DATABASE_HOST" "--port" "$POSTGRESQL_CLIENT_DATABASE_PORT_NUMBER") - [[ -n "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME" ]] && createdb_args+=("-u" "$POSTGRESQL_CLIENT_CREATE_DATABASE_USERNAME") - postgresql_ensure_database_exists "${createdb_args[@]}" - # Ensure the list of extensions are enabled in the specified database - if [[ "${#extensions[@]}" -gt 0 ]]; then - for extension_to_create in "${extensions[@]}"; do - echo "CREATE EXTENSION IF NOT EXISTS ${extension_to_create}" | postgresql_remote_execute "$POSTGRESQL_CLIENT_DATABASE_HOST" "$POSTGRESQL_CLIENT_DATABASE_PORT_NUMBER" "$database_name" "$POSTGRESQL_CLIENT_POSTGRES_USER" "$POSTGRESQL_CLIENT_POSTGRES_PASSWORD" - done - fi - done - fi - # Execute a custom SQL script - if [[ -n "$POSTGRESQL_CLIENT_EXECUTE_SQL" ]]; then - info "Executing custom SQL script" - echo "$POSTGRESQL_CLIENT_EXECUTE_SQL" | postgresql_remote_execute "$POSTGRESQL_CLIENT_DATABASE_HOST" "$POSTGRESQL_CLIENT_DATABASE_PORT_NUMBER" "postgres" "$POSTGRESQL_CLIENT_POSTGRES_USER" "$POSTGRESQL_CLIENT_POSTGRES_PASSWORD" - fi - # Avoid exit code of previous commands to affect the result of this function - true -} - -# Copyright Broadcom, Inc. All Rights Reserved. -# SPDX-License-Identifier: APACHE-2.0 - -# shellcheck disable=SC2148 - -######################## -# Return PostgreSQL major version -# Globals: -# POSTGRESQL_* -# Arguments: -# None -# Returns: -# String -######################### -postgresql_get_major_version() { - psql --version | grep -oE "[0-9]+\.[0-9]+" | grep -oE "^[0-9]+" -} - -######################## -# Gets an environment variable name based on the suffix -# Arguments: -# $1 - environment variable suffix -# Returns: -# environment variable name -######################### -get_env_var_value() { - local env_var_suffix="${1:?missing suffix}" - local env_var_name - for env_var_prefix in POSTGRESQL POSTGRESQL_CLIENT; do - env_var_name="${env_var_prefix}_${env_var_suffix}" - if [[ -n "${!env_var_name:-}" ]]; then - echo "${!env_var_name}" - break - fi - done -} - -######################## -# Execute an arbitrary query/queries against the running PostgreSQL service and print the output -# Stdin: -# Query/queries to execute -# Globals: -# BITNAMI_DEBUG -# POSTGRESQL_* -# Arguments: -# $1 - Database where to run the queries -# $2 - User to run queries -# $3 - Password -# $4 - Extra options (eg. -tA) -# Returns: -# None -######################### -postgresql_execute_print_output() { - local -r db="${1:-}" - local -r user="${2:-postgres}" - local -r pass="${3:-}" - local opts - read -r -a opts <<<"${@:4}" - - local args=("-U" "$user" "-p" "${POSTGRESQL_PORT_NUMBER:-5432}" "-h" "127.0.0.1") - [[ -n "$db" ]] && args+=("-d" "$db") - [[ "${#opts[@]}" -gt 0 ]] && args+=("${opts[@]}") - - # Execute the Query/queries from stdin - PGPASSWORD=$pass psql "${args[@]}" -} - -######################## -# Execute an arbitrary query/queries against the running PostgreSQL service -# Stdin: -# Query/queries to execute -# Globals: -# BITNAMI_DEBUG -# POSTGRESQL_* -# Arguments: -# $1 - Database where to run the queries -# $2 - User to run queries -# $3 - Password -# $4 - Extra options (eg. -tA) -# Returns: -# None -######################### -postgresql_execute() { - if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then - "postgresql_execute_print_output" "$@" - elif [[ "${NO_ERRORS:-false}" = true ]]; then - "postgresql_execute_print_output" "$@" 2>/dev/null - else - "postgresql_execute_print_output" "$@" >/dev/null 2>&1 - fi -} - -######################## -# Execute an arbitrary query/queries against a remote PostgreSQL service and print to stdout -# Stdin: -# Query/queries to execute -# Globals: -# BITNAMI_DEBUG -# DB_* -# Arguments: -# $1 - Remote PostgreSQL service hostname -# $2 - Remote PostgreSQL service port -# $3 - Database where to run the queries -# $4 - User to run queries -# $5 - Password -# $6 - Extra options (eg. -tA) -# Returns: -# None -postgresql_remote_execute_print_output() { - local -r hostname="${1:?hostname is required}" - local -r port="${2:?port is required}" - local -a args=("-h" "$hostname" "-p" "$port") - shift 2 - "postgresql_execute_print_output" "$@" "${args[@]}" -} - -######################## -# Execute an arbitrary query/queries against a remote PostgreSQL service -# Stdin: -# Query/queries to execute -# Globals: -# BITNAMI_DEBUG -# DB_* -# Arguments: -# $1 - Remote PostgreSQL service hostname -# $2 - Remote PostgreSQL service port -# $3 - Database where to run the queries -# $4 - User to run queries -# $5 - Password -# $6 - Extra options (eg. -tA) -# Returns: -# None -postgresql_remote_execute() { - if [[ "${BITNAMI_DEBUG:-false}" = true ]]; then - "postgresql_remote_execute_print_output" "$@" - elif [[ "${NO_ERRORS:-false}" = true ]]; then - "postgresql_remote_execute_print_output" "$@" 2>/dev/null - else - "postgresql_remote_execute_print_output" "$@" >/dev/null 2>&1 - fi -} - -######################## -# Optionally create the given database user -# Flags: -# -p|--password - database password -# --host - database host -# --port - database port -# Arguments: -# $1 - user -# Returns: -# None -######################### -postgresql_ensure_user_exists() { - local -r user="${1:?user is missing}" - local password="" - # For accessing an external database - local db_host="" - local db_port="" - - # Validate arguments - shift 1 - while [ "$#" -gt 0 ]; do - case "$1" in - -p | --password) - shift - password="${1:?missing password}" - ;; - --host) - shift - db_host="${1:?missing database host}" - ;; - --port) - shift - db_port="${1:?missing database port}" - ;; - *) - echo "Invalid command line flag $1" >&2 - return 1 - ;; - esac - shift - done - - local -a postgresql_execute_cmd=("postgresql_execute") - [[ -n "$db_host" && -n "$db_port" ]] && postgresql_execute_cmd=("postgresql_remote_execute" "$db_host" "$db_port") - local -a postgresql_execute_flags=("postgres" "$(get_env_var_value POSTGRES_USER)" "$(get_env_var_value POSTGRES_PASSWORD)") - - "${postgresql_execute_cmd[@]}" "${postgresql_execute_flags[@]}" <&2 - return 1 - ;; - esac - shift - done - - local -a postgresql_execute_cmd=("postgresql_execute") - [[ -n "$db_host" && -n "$db_port" ]] && postgresql_execute_cmd=("postgresql_remote_execute" "$db_host" "$db_port") - local -a postgresql_execute_flags=("postgres" "$(get_env_var_value POSTGRES_USER)" "$(get_env_var_value POSTGRES_PASSWORD)") - - "${postgresql_execute_cmd[@]}" "${postgresql_execute_flags[@]}" < In addition to the previous environment variables, all the parameters from the configuration file can be overwritten by using environment variables with this format: `AIRFLOW__{SECTION}__{KEY}`. Note the double underscores. @@ -456,6 +461,11 @@ docker run -d --name airflow -p 8080:8080 \ ## Notable Changes +### Starting October 30, 2024 + +* The Airflow container now supports running as a Web server, Scheduler or Worker component, so it's no longer necessary to combine this container image with `bitnami/airflow-scheduler` and `bitnami/airflow-worker` in order to use the `CeleryExecutor`. +* The `AIRFLOW_COMPONENT_TYPE` environment variable was introduced to specify the component type. Current supported values are `webserver`, `scheduler` and `worker`, although it's planned to add soon support for `dag-processor` and `triggerer` components. The default value is `webserver`. + ### 1.10.15-debian-10-r17 and 2.0.1-debian-10-r50 * The size of the container image has been decreased.