From 49ce56f849b34bbff800f7218f4e65d8fee7bf6a Mon Sep 17 00:00:00 2001 From: Jannis R Date: Mon, 9 Oct 2023 17:15:41 +0300 Subject: [PATCH 01/11] =?UTF-8?q?readme:=20remove=20jq=20prerequisite=20?= =?UTF-8?q?=F0=9F=93=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit follow-up of 5fd6d5 --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 88926ca..d480525 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,3 @@ First, we have to install Docker (Desktop); Follow the [official installation in You will also need the following tools: - [`make`](https://en.wikipedia.org/wiki/Make_(software)) -- [`jq`](https://jqlang.github.io/jq/) From baf79fb8360c7e05ee7b3ff00a77b3d54e3854d1 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Fri, 13 Oct 2023 01:02:54 +0300 Subject: [PATCH 02/11] =?UTF-8?q?.env.local.example:=20fill=20$DAGSTER=5FP?= =?UTF-8?q?OSTGRES=5FPASSWORD=20=F0=9F=92=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CI directly uses .env.local.example as .env.local, so it fails with an empty value. --- .env.local.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.local.example b/.env.local.example index a485e59..50e73f1 100644 --- a/.env.local.example +++ b/.env.local.example @@ -23,7 +23,7 @@ VOI_PASSWORD= # A random password which will be used for the postgresql database which is used by Dagster. Should be long and should not contain special # signs in order to prevent escaping issues (eg: 32 chars alphanumeric). -DAGSTER_POSTGRES_PASSWORD= +DAGSTER_POSTGRES_PASSWORD=boapaLEspBg6qhY8 # A random password for geoserver admin access GEOSERVER_ADMIN_PASSWORD=FNn71ymEeBJ13VHd From 3aa655a33e31fd4666e8198f127ad9c74a220bfe Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 12 Oct 2023 23:36:18 +0300 Subject: [PATCH 03/11] add pgbouncer service --- .env | 5 +++++ .env.local.example | 3 +++ docker-compose.yml | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/.env b/.env index faa0671..292f07a 100644 --- a/.env +++ b/.env @@ -46,6 +46,11 @@ IPL_GTFS_DB_POSTGRES_PASSWORD=password IPL_GTFS_DB_POSTGRES_DB=gtfs_importer IPL_GTFS_DB_POSTGRES_DB_PREFIX=gtfs +# pgbouncer variables +PGBOUNCER_IMAGE=bitnami/pgbouncer:1 +PGBOUNCER_POSTGRES_PORT=6432 +PGBOUNCER_POSTGRES_USER=postgres + # gtfs-api variables IPL_GTFS_API_PORT=4000 # todo diff --git a/.env.local.example b/.env.local.example index 50e73f1..1a5a554 100644 --- a/.env.local.example +++ b/.env.local.example @@ -21,6 +21,9 @@ DEER_PASSWORD= VOI_USER= VOI_PASSWORD= +# A random password for pgbouncer clients +PGBOUNCER_POSTGRES_PASSWORD=XP1EdA6S0BHmybkC + # A random password which will be used for the postgresql database which is used by Dagster. Should be long and should not contain special # signs in order to prevent escaping issues (eg: 32 chars alphanumeric). DAGSTER_POSTGRES_PASSWORD=boapaLEspBg6qhY8 diff --git a/docker-compose.yml b/docker-compose.yml index 7a83822..1281845 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -347,6 +347,38 @@ services: start_period: 10s retries: 10 + # pgbouncer acts like a facade in front of PostgreSQL instances + pgbouncer: + networks: [ipl] + image: ${PGBOUNCER_IMAGE} + ports: + - ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT}:6432 + environment: + # Even if we define all upstream database connections "manually" using $PGBOUNCER_DSN_* (see below), the bitnami/pgbouncer Docker image expects 1 connection to be specified via $POSTGRESQL_*, which it implicitly adds to the `[database]` section of pgbouncer.ini. Note that the specified database credentials must be valid. + # However, the $POSTGRESQL_* env vars are *also* used to configure client access to pgbouncer: They get implicitly added to the generated userlist.txt, meaning that clients *must* use them (if there are no other user/password pairs defined, which we don't do) to connect to *any* exposed connection. + # Therefore, we duplicate the connection to ipl-db and expose it as `meta`, clearly denoting it as a connection *not* intended for pgbouncer clients. + PGBOUNCER_DATABASE: meta + POSTGRESQL_HOST: ipl-db + POSTGRESQL_DATABASE: ${IPL_POSTGRES_DB:?missing/empty $IPL_POSTGRES_DB} + POSTGRESQL_USERNAME: ${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + POSTGRESQL_PASSWORD: ${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} + # > Most polite method. When a client connects, a server connection will be assigned to it for the whole duration the client stays connected. When the client disconnects, the server connection will be put back into the pool. This is the default method. + # We hardcode this, because we definitely don't want the other pooling modes. + PGBOUNCER_POOL_MODE: session + # todo: remove? should work without 🤔 + # see also https://github.com/bitnami/containers/issues/48636#issuecomment-1722518107 + PGBOUNCER_AUTH_TYPE: md5 + + # upsteam connections + # todo + restart: unless-stopped + healthcheck: + # pgbouncer exposes a `pgbouncer` "meta database", providing an interface for statistics and to administer the instance. We use it here to check if pgbouncer is working properly. + test: 'env PGPASSWORD="$$POSTGRESQL_PASSWORD" psql -p 6432 -U "$$POSTGRESQL_USERNAME" pgbouncer -b -c "SHOW USERS" >/dev/null' + interval: 0m15s + timeout: 5s + retries: 10 + gtfs-api: networks: [ipl] depends_on: From 10334751b80f66bc9580edf004695969768d11f7 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 12 Oct 2023 23:48:44 +0300 Subject: [PATCH 04/11] let geoserver connect to pgbouncer instead of {ipl,gtfs}-db --- .env.local.example | 2 +- Makefile | 6 +-- docker-compose.yml | 39 +++++++++++++------ .../MobiData-BW/gtfs-db/datastore.xml | 18 +++------ .../MobiData-BW/ipl-db/datastore.xml | 11 +++--- 5 files changed, 43 insertions(+), 33 deletions(-) diff --git a/.env.local.example b/.env.local.example index 1a5a554..59a23ce 100644 --- a/.env.local.example +++ b/.env.local.example @@ -21,7 +21,7 @@ DEER_PASSWORD= VOI_USER= VOI_PASSWORD= -# A random password for pgbouncer clients +# A random password for pgbouncer clients (geoserver, etc.) PGBOUNCER_POSTGRES_PASSWORD=XP1EdA6S0BHmybkC # A random password which will be used for the postgresql database which is used by Dagster. Should be long and should not contain special diff --git a/Makefile b/Makefile index d8db2d2..cfd38eb 100644 --- a/Makefile +++ b/Makefile @@ -68,9 +68,9 @@ import-new-gtfs: init $(DOCKER_COMPOSE) --profile import-new-gtfs run --rm gtfs-importer # restart dependent services # Restarting the containers would re-run them with the old env vars, so we `stop` & `start` instead. - $(DOCKER_COMPOSE) stop --timeout 30 gtfs-api geoserver - $(DOCKER_COMPOSE) rm gtfs-api geoserver - $(DOCKER_COMPOSE) up -d --wait --wait-timeout 30 gtfs-api geoserver + $(DOCKER_COMPOSE) stop --timeout 30 gtfs-api + $(DOCKER_COMPOSE) rm gtfs-api + $(DOCKER_COMPOSE) up -d --wait --wait-timeout 30 gtfs-api .PHONY: gtfs-db-psql gtfs-db-psql: diff --git a/docker-compose.yml b/docker-compose.yml index 1281845..2170d88 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -224,17 +224,12 @@ services: # kartoza/geoserver maps PROXY_BASE_URL_PARAMETRIZATION to ALLOW_ENV_PARAMETRIZATION, see https://github.com/kartoza/docker-geoserver/blob/844c7a26acd1687358c821ea73117a721f25f7b6/scripts/entrypoint.sh#L72 - PROXY_BASE_URL_PARAMETRIZATION=true # The following parameters are *not* picked up by kartoza/geoserver, but instead passed through as "regular" env vars, and then read by Geoserver whenever one of the config files references them. - - IPL_POSTGRES_PASSWORD=${IPL_POSTGRES_PASSWORD} - - IPL_GTFS_DB_POSTGRES_PASSWORD=${IPL_GTFS_DB_POSTGRES_PASSWORD} - # contains the latest import's DB name as `PGDATABASE` - env_file: ./.imported-gtfs-db.env + - PGBOUNCER_POSTGRES_USER=${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + - PGBOUNCER_POSTGRES_PASSWORD=${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} depends_on: - ipl-db: - # For sharing layers - condition: service_healthy - gtfs-db: - # For transit layer - condition: service_healthy + pgbouncer: + # For sharing & transit layers + condition: service_healthy healthcheck: test: "curl -fsS -o /dev/null -u '${GEOSERVER_ADMIN_USER}':'${GEOSERVER_ADMIN_PASSWORD}' http://localhost:8080/geoserver/rest/about/version.xml" interval: 0m15s @@ -347,9 +342,19 @@ services: start_period: 10s retries: 10 - # pgbouncer acts like a facade in front of PostgreSQL instances + # pgbouncer acts like a facade in front of PostgreSQL instances (ipl-db, gtfs-db), providing + # - programmatically configurable query routing (used by `make import-new-gtfs`) + # - improved connect performance for clients, especially Dagster workers pgbouncer: networks: [ipl] + depends_on: + ipl-db: + condition: service_started + gtfs-db: + condition: service_started + links: + - ipl-db + - gtfs-db image: ${PGBOUNCER_IMAGE} ports: - ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT}:6432 @@ -368,9 +373,19 @@ services: # todo: remove? should work without 🤔 # see also https://github.com/bitnami/containers/issues/48636#issuecomment-1722518107 PGBOUNCER_AUTH_TYPE: md5 + # Geoserver fails to connect without this config + # see https://github.com/bitnami/containers/issues/25394#issuecomment-1457893192 + PGBOUNCER_IGNORE_STARTUP_PARAMETERS: extra_float_digits # upsteam connections - # todo + # see also https://www.pgbouncer.org/config.html#section-databases + # see also https://www.postgresql.org/docs/15/libpq-connect.html#id-1.7.3.8.3.5 + # expose ipl-db as `ipl` + PGBOUNCER_DSN_0: "ipl=host=ipl-db dbname=${IPL_POSTGRES_DB:?missing/empty $IPL_POSTGRES_DB} user=${IPL_POSTGRES_USER:?missing/empty $IPL_POSTGRES_USER} password=${IPL_POSTGRES_PASSWORD:?missing/empty $IPL_POSTGRES_PASSWORD}" + # todo: expose dagster-postgresql as `dagster` + # expose the latest GTFS import within gtfs-db as `gtfs` + # todo: let gtfs-importer modify this config programmatically! + PGBOUNCER_DSN_2: "gtfs=host=gtfs-db dbname=${IPL_GTFS_DB_POSTGRES_DB:?missing/empty $IPL_GTFS_DB_POSTGRES_DB} user=${IPL_GTFS_DB_POSTGRES_USER:?missing/empty $IPL_GTFS_DB_POSTGRES_USER} password=${IPL_GTFS_DB_POSTGRES_PASSWORD:?missing/empty $IPL_GTFS_DB_POSTGRES_PASSWORD}" restart: unless-stopped healthcheck: # pgbouncer exposes a `pgbouncer` "meta database", providing an interface for statistics and to administer the instance. We use it here to check if pgbouncer is working properly. diff --git a/etc/geoserver/workspaces/MobiData-BW/gtfs-db/datastore.xml b/etc/geoserver/workspaces/MobiData-BW/gtfs-db/datastore.xml index a5ca9e1..6f28b4e 100644 --- a/etc/geoserver/workspaces/MobiData-BW/gtfs-db/datastore.xml +++ b/etc/geoserver/workspaces/MobiData-BW/gtfs-db/datastore.xml @@ -14,15 +14,9 @@ true 1 false - - ${PGDATABASE} - gtfs-db + + gtfs + pgbouncer true DISABLE true @@ -33,15 +27,15 @@ 20 false FAST - 5432 - ${IPL_GTFS_DB_POSTGRES_PASSWORD} + 6432 + ${PGBOUNCER_POSTGRES_PASSWORD} 1 postgis mdbw 10 3 true - postgres + ${PGBOUNCER_POSTGRES_USER} 300 <__default>false diff --git a/etc/geoserver/workspaces/MobiData-BW/ipl-db/datastore.xml b/etc/geoserver/workspaces/MobiData-BW/ipl-db/datastore.xml index 727a5aa..5732e97 100644 --- a/etc/geoserver/workspaces/MobiData-BW/ipl-db/datastore.xml +++ b/etc/geoserver/workspaces/MobiData-BW/ipl-db/datastore.xml @@ -14,8 +14,9 @@ true 1 false - geoserver - ipl-db + + ipl + pgbouncer true DISABLE true @@ -26,15 +27,15 @@ 20 false FAST - 5432 - ${IPL_POSTGRES_PASSWORD} + 6432 + ${PGBOUNCER_POSTGRES_PASSWORD} 1 postgis mdbw 10 3 true - geoserver + ${PGBOUNCER_POSTGRES_USER} 300 <__default>false From 0c1f2ba559a48b49761a669092bed634270d6e3c Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 12 Oct 2023 23:51:04 +0300 Subject: [PATCH 05/11] let gtfs-api connect to pgbouncer instead of gtfs-db --- .env.local.example | 2 +- Makefile | 5 ----- docker-compose.yml | 15 ++++++++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/.env.local.example b/.env.local.example index 59a23ce..c83b53d 100644 --- a/.env.local.example +++ b/.env.local.example @@ -21,7 +21,7 @@ DEER_PASSWORD= VOI_USER= VOI_PASSWORD= -# A random password for pgbouncer clients (geoserver, etc.) +# A random password for pgbouncer clients (gtfs-api, geoserver, etc.) PGBOUNCER_POSTGRES_PASSWORD=XP1EdA6S0BHmybkC # A random password which will be used for the postgresql database which is used by Dagster. Should be long and should not contain special diff --git a/Makefile b/Makefile index cfd38eb..777ec1b 100644 --- a/Makefile +++ b/Makefile @@ -66,11 +66,6 @@ docker-ps: import-new-gtfs: init $(DOCKER_COMPOSE) build gtfs-importer $(DOCKER_COMPOSE) --profile import-new-gtfs run --rm gtfs-importer - # restart dependent services - # Restarting the containers would re-run them with the old env vars, so we `stop` & `start` instead. - $(DOCKER_COMPOSE) stop --timeout 30 gtfs-api - $(DOCKER_COMPOSE) rm gtfs-api - $(DOCKER_COMPOSE) up -d --wait --wait-timeout 30 gtfs-api .PHONY: gtfs-db-psql gtfs-db-psql: diff --git a/docker-compose.yml b/docker-compose.yml index 2170d88..413f9ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -397,20 +397,21 @@ services: gtfs-api: networks: [ipl] depends_on: - gtfs-db: + pgbouncer: condition: service_healthy links: - - gtfs-db + - pgbouncer image: postgrest/postgrest ports: - ${IPL_GTFS_API_PORT}:3000 read_only: true - # contains the latest import's DB name as `PGDATABASE` - env_file: ./.imported-gtfs-db.env environment: - PGHOST: gtfs-db - PGUSER: ${IPL_GTFS_DB_POSTGRES_USER} - PGPASSWORD: ${IPL_GTFS_DB_POSTGRES_PASSWORD} + # connect via pgbouncer to regardless of the GTFS DBs' suffixes & improve performance + PGHOST: pgbouncer + PGPORT: ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT} + PGUSER: ${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + PGPASSWORD: ${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} + PGDATABASE: gtfs # determined by pgbouncer! # PostgREST-specific env vars PGRST_OPENAPI_SERVER_PROXY_URI: ${IPL_GTFS_API_PUBLIC_BASE_URL} PGRST_DB_SCHEMAS: api From 6c2c37e724d89222902c31f78b103095c0fbdebc Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 12 Oct 2023 23:55:49 +0300 Subject: [PATCH 06/11] make import-new-gtfs: reconfigure pgbouncer's connection to gtfs-db --- .gitignore | 2 -- Makefile | 4 +++- docker-compose.yml | 16 +++++++++++----- etc/reload-pgbouncer-databases.sh | 26 ++++++++++++++++++++++++++ gtfs-importer/importer.js | 17 ++++++++++++----- 5 files changed, 52 insertions(+), 13 deletions(-) create mode 100755 etc/reload-pgbouncer-databases.sh diff --git a/.gitignore b/.gitignore index 8e0da49..215c143 100644 --- a/.gitignore +++ b/.gitignore @@ -9,5 +9,3 @@ Thumbs.db /var /.env.local - -/.imported-gtfs-db.env diff --git a/Makefile b/Makefile index 777ec1b..1c005f5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ all: docker-up .PHONY: init init: - touch -a .imported-gtfs-db.env + mkdir -p var/gtfs + touch -a var/gtfs/gtfs-pgbouncer-dsn.txt # Container management # -------------------- @@ -66,6 +67,7 @@ docker-ps: import-new-gtfs: init $(DOCKER_COMPOSE) build gtfs-importer $(DOCKER_COMPOSE) --profile import-new-gtfs run --rm gtfs-importer + $(DOCKER_COMPOSE) --profile import-new-gtfs exec pgbouncer /reload-pgbouncer-databases.sh .PHONY: gtfs-db-psql gtfs-db-psql: diff --git a/docker-compose.yml b/docker-compose.yml index 413f9ab..3c8da33 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -355,9 +355,14 @@ services: links: - ipl-db - gtfs-db + - dagster-postgresql image: ${PGBOUNCER_IMAGE} ports: - ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT}:6432 + volumes: + # contains the latest import's DB name + - ./var/gtfs/pgbouncer-dsn.txt:/var/gtfs-pgbouncer-dsn.txt + - ./etc/reload-pgbouncer-databases.sh:/reload-pgbouncer-databases.sh environment: # Even if we define all upstream database connections "manually" using $PGBOUNCER_DSN_* (see below), the bitnami/pgbouncer Docker image expects 1 connection to be specified via $POSTGRESQL_*, which it implicitly adds to the `[database]` section of pgbouncer.ini. Note that the specified database credentials must be valid. # However, the $POSTGRESQL_* env vars are *also* used to configure client access to pgbouncer: They get implicitly added to the generated userlist.txt, meaning that clients *must* use them (if there are no other user/password pairs defined, which we don't do) to connect to *any* exposed connection. @@ -384,8 +389,10 @@ services: PGBOUNCER_DSN_0: "ipl=host=ipl-db dbname=${IPL_POSTGRES_DB:?missing/empty $IPL_POSTGRES_DB} user=${IPL_POSTGRES_USER:?missing/empty $IPL_POSTGRES_USER} password=${IPL_POSTGRES_PASSWORD:?missing/empty $IPL_POSTGRES_PASSWORD}" # todo: expose dagster-postgresql as `dagster` # expose the latest GTFS import within gtfs-db as `gtfs` - # todo: let gtfs-importer modify this config programmatically! - PGBOUNCER_DSN_2: "gtfs=host=gtfs-db dbname=${IPL_GTFS_DB_POSTGRES_DB:?missing/empty $IPL_GTFS_DB_POSTGRES_DB} user=${IPL_GTFS_DB_POSTGRES_USER:?missing/empty $IPL_GTFS_DB_POSTGRES_USER} password=${IPL_GTFS_DB_POSTGRES_PASSWORD:?missing/empty $IPL_GTFS_DB_POSTGRES_PASSWORD}" + # $PGBOUNCER_DSN_2 gets generated from /var/gtfs-pgbouncer-dsn.txt (which is written by gtfs-importer) by the Docker image's entrypoint script. + # see https://github.com/bitnami/containers/issues/46152#issuecomment-1695320501 + # todo: not yet, push & PR our changes, see `image` field above! + PGBOUNCER_DSN_2_FILE: /var/gtfs-pgbouncer-dsn.txt restart: unless-stopped healthcheck: # pgbouncer exposes a `pgbouncer` "meta database", providing an interface for statistics and to administer the instance. We use it here to check if pgbouncer is working properly. @@ -441,7 +448,6 @@ services: volumes: - ./var/gtfs:/var/gtfs - ./etc/gtfs:/etc/gtfs - - ./.imported-gtfs-db.env:/var/.imported-gtfs-db.env environment: PGHOST: gtfs-db PGUSER: ${IPL_GTFS_DB_POSTGRES_USER} @@ -451,8 +457,8 @@ services: GTFS_DOWNLOAD_URL: ${GTFS_DOWNLOAD_URL:-} # the prefix of all DBs created for imports by gtfs-importer GTFS_IMPORTER_DB_PREFIX: ${IPL_GTFS_DB_POSTGRES_DB_PREFIX} - # path to the env file containing the latest import's DB name as `PGDATABASE` - GTFS_IMPORTER_ENV_FILE: /var/.imported-gtfs-db.env + # path to the file containing the latest import's DB name + GTFS_IMPORTER_DSN_FILE: /var/gtfs/pgbouncer-dsn.txt GTFS_TMP_DIR: /var/gtfs gtfs-swagger-ui: diff --git a/etc/reload-pgbouncer-databases.sh b/etc/reload-pgbouncer-databases.sh new file mode 100755 index 0000000..428cdd8 --- /dev/null +++ b/etc/reload-pgbouncer-databases.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -eu -o pipefail + +export PGUSER="$POSTGRESQL_USERNAME" +export PGPASSWORD="$POSTGRESQL_PASSWORD" + +set -x + +# this script gets called at pgbouncer's startup as well +/opt/bitnami/scripts/pgbouncer/setup.sh + +# https://www.pgbouncer.org/usage.html +# > SUSPEND +# > All socket buffers are flushed and PgBouncer stops listening for data on them. The command will not return before all buffers are empty. To be used at the time of PgBouncer online reboot. +# > New client connections to a suspended database will wait until RESUME is called. +# > RELOAD +# > The PgBouncer process will reload its configuration files and update changeable settings. This includes the main configuration file as well as the files specified by the settings auth_file and auth_hba_file. +# > PgBouncer notices when a configuration file reload changes the connection parameters of a database definition. An existing server connection to the old destination will be closed when the server connection is next released (according to the pooling mode), and new server connections will immediately use the updated connection parameters. +# > RESUME [db] +# > Resume work from previous KILL, PAUSE, or SUSPEND command. +psql -b -p 6432 pgbouncer < { return new Promise((resolve, reject) => { @@ -122,10 +122,17 @@ try { ON CONFLICT (always_true) DO UPDATE SET db_name = $1; `, [dbName]) - if (PATH_TO_ENV_FILE !== null) { - console.info(`writing PGDATABASE="${dbName}" into env file ${PATH_TO_ENV_FILE}`) - await writeFile(PATH_TO_ENV_FILE, `PGDATABASE="${dbName}"\n`) - console.info('make sure to also put it into .env.local, so that `geoserver` can read it') + if (PATH_TO_DSN_FILE !== null) { + // https://www.pgbouncer.org/config.html#section-databases + // https://www.postgresql.org/docs/15/libpq-connect.html#id-1.7.3.8.3.5 + const { + PGHOST, + PGUSER, + PGPASSWORD, + } = process.env + const dsn = `gtfs=host=${PGHOST} dbname=${dbName} user=${PGUSER} password=${PGPASSWORD}` + console.info(`writing "${dsn}" into env file ${PATH_TO_DSN_FILE}`) + await writeFile(PATH_TO_DSN_FILE, dsn) } console.info('import succeeded, committing all changes to "latest_import"!') From 354eb5a5c921184399faf1644442114f353dacf3 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 9 Nov 2023 15:40:04 +0100 Subject: [PATCH 07/11] =?UTF-8?q?make=20import-new-gtfs:=20ensure=20pgboun?= =?UTF-8?q?cer=20is=20running=20=F0=9F=90=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 1c005f5..ceed904 100644 --- a/Makefile +++ b/Makefile @@ -67,6 +67,8 @@ docker-ps: import-new-gtfs: init $(DOCKER_COMPOSE) build gtfs-importer $(DOCKER_COMPOSE) --profile import-new-gtfs run --rm gtfs-importer + # make sure pgbouncer is running + $(DOCKER_COMPOSE) --profile import-new-gtfs up --detach --wait --wait-timeout 20 pgbouncer $(DOCKER_COMPOSE) --profile import-new-gtfs exec pgbouncer /reload-pgbouncer-databases.sh .PHONY: gtfs-db-psql From 7097be52cd479984ded7a0cd0d6d4ed8d70e31f9 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 12 Oct 2023 23:59:36 +0300 Subject: [PATCH 08/11] let dagster-* connect to pgbouncer instead of dagster-postgresql --- .env.local.example | 2 +- docker-compose.yml | 45 ++++++++++++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/.env.local.example b/.env.local.example index c83b53d..868156b 100644 --- a/.env.local.example +++ b/.env.local.example @@ -21,7 +21,7 @@ DEER_PASSWORD= VOI_USER= VOI_PASSWORD= -# A random password for pgbouncer clients (gtfs-api, geoserver, etc.) +# A random password for pgbouncer clients (gtfs-api, geoserver, dagster, etc.) PGBOUNCER_POSTGRES_PASSWORD=XP1EdA6S0BHmybkC # A random password which will be used for the postgresql database which is used by Dagster. Should be long and should not contain special diff --git a/docker-compose.yml b/docker-compose.yml index 3c8da33..827f37b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -263,10 +263,12 @@ services: image: ${DAGSTER_PIPELINE_IMAGE} restart: always environment: - - PGHOST=dagster-postgresql - - PGUSER=${DAGSTER_POSTGRES_USER} - - PGPASSWORD=${DAGSTER_POSTGRES_PASSWORD} - - PGDATABASE=${DAGSTER_POSTGRES_DB} + # connect via pgbouncer to improve performance + - PGHOST=pgbouncer + - PGPORT=${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT} + - PGUSER=${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + - PGPASSWORD=${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} + - PGDATABASE=dagster # determined by pgbouncer! # Use docker hostname of lamassu service to avoid roundtrip via proxy service - IPL_LAMASSU_BASE_URL=http://lamassu/ - IPL_POSTGRES_HOST=ipl-db @@ -275,7 +277,7 @@ services: - IPL_POSTGRES_USER - IPL_POSTGRES_PASSWORD depends_on: - dagster-postgresql: + pgbouncer: condition: service_healthy # This service runs dagster-webserver, which loads your user code from the user code container. @@ -289,16 +291,19 @@ services: ports: - "3000:3000" environment: - PGHOST: dagster-postgresql - PGUSER: ${DAGSTER_POSTGRES_USER} - PGPASSWORD: ${DAGSTER_POSTGRES_PASSWORD} - PGDATABASE: ${DAGSTER_POSTGRES_DB} + # connect via pgbouncer to improve performance + PGHOST: pgbouncer + # todo: doesn't work yet, uses default port 5432 instead + PGPORT: ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT} + PGUSER: ${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + PGPASSWORD: ${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} + PGDATABASE: dagster # determined by pgbouncer! volumes: # Make docker client accessible so we can terminate containers from dagster-webserver - /var/run/docker.sock:/var/run/docker.sock - /tmp/io_manager_storage:/tmp/io_manager_storage - ./etc/dagster:/opt/dagster/dagster_home/ depends_on: - dagster-postgresql: + pgbouncer: condition: service_healthy dagster-pipeline: condition: service_started @@ -310,16 +315,19 @@ services: image: ${DAGSTER_DAEMON_IMAGE} restart: on-failure environment: - PGHOST: dagster-postgresql - PGUSER: ${DAGSTER_POSTGRES_USER} - PGPASSWORD: ${DAGSTER_POSTGRES_PASSWORD} - PGDATABASE: ${DAGSTER_POSTGRES_DB} + # connect via pgbouncer to improve performance + PGHOST: pgbouncer + # todo: doesn't work yet, uses default port 5432 instead + PGPORT: ${PGBOUNCER_POSTGRES_PORT:?missing/empty $PGBOUNCER_POSTGRES_PORT} + PGUSER: ${PGBOUNCER_POSTGRES_USER:?missing/empty $PGBOUNCER_POSTGRES_USER} + PGPASSWORD: ${PGBOUNCER_POSTGRES_PASSWORD:?missing/empty $PGBOUNCER_POSTGRES_PASSWORD} + PGDATABASE: dagster # determined by pgbouncer! volumes: # Make docker client accessible so we can launch containers using host docker - /var/run/docker.sock:/var/run/docker.sock - /tmp/io_manager_storage:/tmp/io_manager_storage - ./etc/dagster:/opt/dagster/dagster_home/ depends_on: - dagster-postgresql: + pgbouncer: condition: service_healthy dagster-pipeline: condition: service_started @@ -342,7 +350,7 @@ services: start_period: 10s retries: 10 - # pgbouncer acts like a facade in front of PostgreSQL instances (ipl-db, gtfs-db), providing + # pgbouncer acts like a facade in front of PostgreSQL instances (ipl-db, gtfs-db, dagster-postgresql), providing # - programmatically configurable query routing (used by `make import-new-gtfs`) # - improved connect performance for clients, especially Dagster workers pgbouncer: @@ -352,6 +360,8 @@ services: condition: service_started gtfs-db: condition: service_started + dagster-postgresql: + condition: service_started links: - ipl-db - gtfs-db @@ -387,7 +397,8 @@ services: # see also https://www.postgresql.org/docs/15/libpq-connect.html#id-1.7.3.8.3.5 # expose ipl-db as `ipl` PGBOUNCER_DSN_0: "ipl=host=ipl-db dbname=${IPL_POSTGRES_DB:?missing/empty $IPL_POSTGRES_DB} user=${IPL_POSTGRES_USER:?missing/empty $IPL_POSTGRES_USER} password=${IPL_POSTGRES_PASSWORD:?missing/empty $IPL_POSTGRES_PASSWORD}" - # todo: expose dagster-postgresql as `dagster` + # expose dagster-postgresql as `dagster` + PGBOUNCER_DSN_1: "dagster=host=dagster-postgresql dbname=${DAGSTER_POSTGRES_DB:?missing/empty $DAGSTER_POSTGRES_DB} user=${DAGSTER_POSTGRES_USER:?missing/empty $DAGSTER_POSTGRES_USER} password=${DAGSTER_POSTGRES_PASSWORD:?missing/empty $DAGSTER_POSTGRES_PASSWORD}" # expose the latest GTFS import within gtfs-db as `gtfs` # $PGBOUNCER_DSN_2 gets generated from /var/gtfs-pgbouncer-dsn.txt (which is written by gtfs-importer) by the Docker image's entrypoint script. # see https://github.com/bitnami/containers/issues/46152#issuecomment-1695320501 From ce089c9a05bf3fc6b2421b006e492e614fc41e7c Mon Sep 17 00:00:00 2001 From: Jannis R Date: Wed, 8 Nov 2023 16:30:35 +0100 Subject: [PATCH 09/11] gtfs-importer: download GTFS with caching --- gtfs-importer/Dockerfile | 13 +++++++++++++ gtfs-importer/import.sh | 25 +++++++++---------------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/gtfs-importer/Dockerfile b/gtfs-importer/Dockerfile index 6636f7f..a743698 100644 --- a/gtfs-importer/Dockerfile +++ b/gtfs-importer/Dockerfile @@ -14,6 +14,7 @@ ARG TARGETVARIANT # curl is needed to download the GTFS # moreutils is needed for sponge # postgresql-client is needed for psql +# note: curl-mirror.mjs would need gunzip *if* the HTTP response was gzipped RUN apt update && apt install -y \ bash \ curl \ @@ -23,6 +24,18 @@ RUN apt update && apt install -y \ zstd \ && rm -rf /var/lib/apt/lists/* +# > Alas, there is no way to tell node to interpret a file with an arbitrary extension as an ESM module. That’s why we have to use the extension .mjs. Workarounds are possible but complicated, as we’ll see later. +# https://exploringjs.com/nodejs-shell-scripting/ch_creating-shell-scripts.html#node.js-esm-modules-as-standalone-shell-scripts-on-unix +# > A script such as homedir.mjs does not need to be executable on Unix because npm installs it via an executable symbolic link […]. +# https://exploringjs.com/nodejs-shell-scripting/ch_creating-shell-scripts.html#how-npm-installs-shell-scripts +ADD \ + --checksum=sha256:95b995d6e30cb765a02c14f265526801664ea9e03a090951cab0aee7fed103ee \ + https://gist.github.com/derhuerst/745cf09fe5f3ea2569948dd215bbfe1a/raw/6df4a02302d77edac674fec52ed1c0b32a795a37/mirror.mjs \ + /opt/curl-mirror.mjs +RUN \ + ln -s /opt/curl-mirror.mjs /usr/local/bin/curl-mirror && \ + chmod +x /usr/local/bin/curl-mirror + RUN \ curl -fsSL \ -H 'User-Agent: gtfs-importer (github.com/mobidata-bw/ipl-orchestration)' \ diff --git a/gtfs-importer/import.sh b/gtfs-importer/import.sh index d84af99..1c1439b 100755 --- a/gtfs-importer/import.sh +++ b/gtfs-importer/import.sh @@ -24,22 +24,15 @@ tidied_path="$gtfs_tmp_dir/tidied.gtfs" print_bold "Downloading & extracting the GTFS feed from $GTFS_DOWNLOAD_URL." set -x -# Using wget with both -c *and* -N is not an option here, so we use curl. -# see also https://gist.github.com/derhuerst/745cf09fe5f3ea2569948dd215bbfe1a -# Note: This *does not* work with an incomplete local download! -# todo: use a (more?) correct & efficient mirroring script -# wget -nv -U "$ua" -O "$zip_path" "$gtfs_url" -# flags logic modified from https://superuser.com/q/1710172 -curl_flags=() -if test -e "$zip_path"; then - curl_flags+=(-z "$zip_path") -fi -curl -fsSL \ - -H "User-Agent: $ua" \ - --compressed -R \ - -o "$zip_path" \ - "${curl_flags[@]}" \ - "$gtfs_url" +# custom curl-based HTTP mirroring/download script +# > curl-mirror [--tmp-prefix …] [--log-level …] [--debug-curl] [-- curl-opts...] +# see https://gist.github.com/derhuerst/745cf09fe5f3ea2569948dd215bbfe1a +curl-mirror \ + --tmp-prefix "$zip_path.mirror-" \ + "$gtfs_url" "$zip_path" \ + -- \ + -H "User-Agent: $ua" + rm -rf "$extracted_path" unzip -d "$extracted_path" "$zip_path" From 72d8f86156bb7e2285358d88ce76d0b5d9006782 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 9 Nov 2023 15:40:44 +0100 Subject: [PATCH 10/11] ipl-db: tweak healthcheck parameters With the default parameters, it would sometimes not become healthy fast enough. --- docker-compose.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 827f37b..73d8e54 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -196,6 +196,9 @@ services: restart: on-failure healthcheck: test: "PGPASSWORD=${IPL_POSTGRES_PASSWORD} pg_isready -h 127.0.0.1 -U ${IPL_POSTGRES_USER} -d ${IPL_POSTGRES_DB}" + interval: 5s + timeout: 3s + retries: 12 geoserver: networks: [ipl] From 959c494456940a95bfcbce8e9a968b5d2abc0280 Mon Sep 17 00:00:00 2001 From: Jannis R Date: Thu, 9 Nov 2023 15:42:32 +0100 Subject: [PATCH 11/11] put todos; gtfs-importer: print gtfs-via-postgres version --- docker-compose.yml | 2 ++ gtfs-importer/import.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 73d8e54..4efc2d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -415,6 +415,7 @@ services: timeout: 5s retries: 10 + # todo /arrivals_departures: enforce stop_id filtering? gtfs-api: networks: [ipl] depends_on: @@ -485,6 +486,7 @@ services: - ${IPL_GTFS_SWAGGER_UI_PORT}:8080 environment: # Swagger UI will call gtfs-api in the browser, so we need to use its "outside" port here. + # todo: localhost -> dev-ipl.mobidata-bw.de API_URL: http://localhost:${IPL_GTFS_API_PORT} BASE_URL: /docs/gtfs labels: diff --git a/gtfs-importer/import.sh b/gtfs-importer/import.sh index 1c1439b..b7a96a1 100755 --- a/gtfs-importer/import.sh +++ b/gtfs-importer/import.sh @@ -61,6 +61,8 @@ set +x print_bold "Importing (tidied) GTFS feed into the $PGDATABASE database." set -x +gtfs-to-sql --version + gtfs-to-sql -d \ --trips-without-shape-id --lower-case-lang-codes \ --stops-location-index \