-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Metadata rds #201
Metadata rds #201
Changes from all commits
9461ad8
9f2f43c
66946de
0e416d5
01f3dbb
dbe07bb
61323f8
e587177
6f2f519
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,32 @@ | ||
# helper to know if env is already loaded | ||
BOOTSTRAPPED=1 | ||
|
||
# database | ||
DB_HOST=local_rds | ||
DB_PORT=5432 | ||
DB_NAME=performance_manager | ||
DB_USER=postgres | ||
DB_PASSWORD=postgres | ||
ALEMBIC_DB_NAME=performance_manager_prod | ||
# metadata database | ||
MD_DB_HOST=local_md_rds | ||
MD_DB_PORT=5433 | ||
MD_DB_NAME=metadata | ||
MD_DB_USER=postgres | ||
MD_DB_PASSWORD=postgres | ||
ALEMBIC_MD_DB_NAME=metadata_prod | ||
|
||
# performance manager database | ||
RPM_DB_HOST=local_rpm_rds | ||
RPM_DB_PORT=5434 | ||
RPM_DB_NAME=performance_manager | ||
RPM_DB_USER=postgres | ||
RPM_DB_PASSWORD=postgres | ||
ALEMBIC_RPM_DB_NAME=performance_manager_prod | ||
|
||
# s3 locations | ||
SPRINGBOARD_BUCKET=mbta-ctd-dataplatform-dev-springboard | ||
ARCHIVE_BUCKET=mbta-ctd-dataplatform-dev-archive | ||
ERROR_BUCKET=mbta-ctd-dataplatform-dev-error | ||
INCOMING_BUCKET=mbta-ctd-dataplatform-dev-incoming | ||
|
||
# mbta-performance with personal access | ||
PUBLIC_ARCHIVE_BUCKET=mbta-ctd-dataplatform-dev-archive | ||
|
||
# Tableau | ||
TABLEAU_USER=DOUPDATE | ||
TABLEAU_PASSWORD=DOUPDATE | ||
TABLEAU_SERVER=http://awtabDEV02.mbta.com | ||
TABLEAU_SERVER=http://awtabDEV02.mbta.com |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. two dbs in local docker compose. |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -2,16 +2,28 @@ version: '3' | |||||||
|
||||||||
services: | ||||||||
|
||||||||
local_rds: | ||||||||
container_name: local_rds | ||||||||
rail_pm_rds: | ||||||||
container_name: ${RPM_DB_HOST} | ||||||||
image: postgres:14.4 | ||||||||
env_file: .env | ||||||||
shm_size: '2gb' | ||||||||
environment: | ||||||||
POSTGRES_DB: ${DB_NAME} | ||||||||
POSTGRES_PASSWORD: ${DB_PASSWORD} | ||||||||
POSTGRES_DB: ${RPM_DB_NAME} | ||||||||
POSTGRES_PASSWORD: ${RPM_DB_PASSWORD} | ||||||||
ports: | ||||||||
- "5432:5432" | ||||||||
- "${RPM_DB_PORT}:5432" | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These alternate ports cause issues with local development, additional logic is required in |
||||||||
command: ["postgres", "-c", "log_statement=all"] | ||||||||
|
||||||||
metadata_rds: | ||||||||
container_name: ${MD_DB_HOST} | ||||||||
image: postgres:15 | ||||||||
env_file: .env | ||||||||
shm_size: '2gb' | ||||||||
environment: | ||||||||
POSTGRES_DB: ${MD_DB_NAME} | ||||||||
POSTGRES_PASSWORD: ${MD_DB_PASSWORD} | ||||||||
ports: | ||||||||
- "${MD_DB_PORT}:5432" | ||||||||
command: ["postgres", "-c", "log_statement=all"] | ||||||||
|
||||||||
performance_manager: | ||||||||
|
@@ -20,7 +32,8 @@ services: | |||||||
build: | ||||||||
context: ./python_src | ||||||||
depends_on: | ||||||||
- local_rds | ||||||||
- rail_pm_rds | ||||||||
- metadata_rds | ||||||||
working_dir: /lamp | ||||||||
volumes: | ||||||||
- ~/.aws:/root/.aws:ro # map credentials to be used by boto3, read-only | ||||||||
|
@@ -32,7 +45,8 @@ services: | |||||||
build: | ||||||||
context: ./python_src | ||||||||
depends_on: | ||||||||
- local_rds | ||||||||
- rail_pm_rds | ||||||||
- metadata_rds | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
working_dir: /lamp | ||||||||
volumes: | ||||||||
# map credentials to be used by boto3, read-only | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,6 +78,16 @@ sqlalchemy.url = driver://user:pass@localhost/dbname | |
script_location = src/lamp_py/migrations | ||
version_locations = src/lamp_py/migrations/versions/performance_manager_prod | ||
|
||
[metadata_staging] | ||
sqlalchemy.url = driver://user:pass@localhost/dbname | ||
script_location = src/lamp_py/migrations | ||
version_locations = src/lamp_py/migrations/versions/metadata_staging | ||
|
||
[metadata_prod] | ||
sqlalchemy.url = driver://user:pass@localhost/dbname | ||
script_location = src/lamp_py/migrations | ||
version_locations = src/lamp_py/migrations/versions/metadata_prod | ||
|
||
Comment on lines
+86
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add metadata db migrations. |
||
[post_write_hooks] | ||
# post_write_hooks defines scripts or Python functions that are run | ||
# on newly generated revision scripts. See the documentation for further | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,6 @@ authors = [ | |
ingestion = 'lamp_py.ingestion.pipeline:start' | ||
performance_manager = 'lamp_py.performance_manager.pipeline:start' | ||
seed_metadata = 'lamp_py.postgres.seed_metadata:run' | ||
snapshot = 'lamp_py.postgres.snapshot:run' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't use this so i removed it. |
||
hyper_update = 'lamp_py.tableau.pipeline:start_hyper_updates' | ||
|
||
[tool.poetry.dependencies] | ||
|
@@ -80,6 +79,6 @@ max-line-length = 80 | |
min-similarity-lines = 10 | ||
# ignore session maker as it gives pylint fits | ||
# https://github.com/PyCQA/pylint/issues/7090 | ||
ignored-classes = ['sqlalchemy.orm.session.sessionmaker','pyarrow.compute'] | ||
ignored-classes = ['sqlalchemy.orm.session.sessionmaker', 'pyarrow.compute'] | ||
Comment on lines
-83
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yaml linting |
||
# ignore the migrations directory. its going to have duplication and _that is ok_. | ||
ignore-paths = ["^src/lamp_py/migrations/.*$"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
"""initial changes | ||
|
||
Revision ID: 07903947aabe | ||
Revises: | ||
Create Date: 2023-12-11 15:12:47.261091 | ||
|
||
""" | ||
from alembic import op | ||
from sqlalchemy.exc import ProgrammingError | ||
import logging | ||
import sqlalchemy as sa | ||
|
||
from lamp_py.postgres.postgres_utils import DatabaseIndex, DatabaseManager | ||
from lamp_py.postgres.metadata_schema import MetadataLog | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "07903947aabe" | ||
down_revision = None | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"metadata_log", | ||
sa.Column("pk_id", sa.Integer(), nullable=False), | ||
sa.Column("rail_pm_processed", sa.Boolean(), nullable=True), | ||
sa.Column("rail_pm_process_fail", sa.Boolean(), nullable=True), | ||
sa.Column("path", sa.String(length=256), nullable=False), | ||
sa.Column( | ||
"created_on", | ||
sa.DateTime(timezone=True), | ||
server_default=sa.text("now()"), | ||
nullable=True, | ||
), | ||
sa.PrimaryKeyConstraint("pk_id"), | ||
sa.UniqueConstraint("path"), | ||
) | ||
op.create_index( | ||
"ix_metadata_log_not_processed", | ||
"metadata_log", | ||
["path"], | ||
unique=False, | ||
postgresql_where=sa.text("rail_pm_processed = false"), | ||
) | ||
|
||
# pull metadata from the rail performance manager database into the | ||
# metadata database. the table may or may not exist, so wrap this in a try | ||
# except | ||
try: | ||
rpm_db_manager = DatabaseManager( | ||
db_index=DatabaseIndex.RAIL_PERFORMANCE_MANAGER | ||
) | ||
|
||
insert_data = [] | ||
# pull metadata from the rail performance manager database via direct | ||
# sql query. the metadata_log table may or may not exist. | ||
with rpm_db_manager.session.begin() as session: | ||
result = session.execute( | ||
"SELECT path, processed, process_fail FROM metadata_log" | ||
) | ||
for row in result: | ||
(path, processed, process_fail) = row | ||
insert_data.append( | ||
{ | ||
"path": path, | ||
"rail_pm_processed": processed, | ||
"rail_pm_process_fail": process_fail, | ||
} | ||
) | ||
|
||
except ProgrammingError as error: | ||
# Error 42P01 is an 'Undefined Table' error. This occurs when there is | ||
# no metadata_log table in the rail performance manager database | ||
# | ||
# Raise all other sql errors | ||
insert_data = [] | ||
if error.orig.pgcode == "42P01": | ||
logging.info("No Metadata Table in Rail Performance Manager") | ||
else: | ||
raise | ||
|
||
# insert data into the metadata database | ||
if insert_data: | ||
op.bulk_insert(MetadataLog.__table__, insert_data) | ||
|
||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index( | ||
"ix_metadata_log_not_processed", | ||
table_name="metadata_log", | ||
) | ||
op.drop_table("metadata_log") | ||
# ### end Alembic commands ### |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
new env variables used to distinguish what db connection params are for.