-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create and Use Metadata Schema / RDS
The metadata database will managed by the ingestion application and used to store created springboard files and their process status in the bus and rail performance manager applications. * Create a schema for the database and add migration configuration paths to pull existing data out of the rail performance manager. * Update the rail performance manager schema to drop the metadata table and add a migration that ensures all data has been moved over before the drop. * Expand the `DatabaseManager` class to connect to either of the two dbs and improve the handling of host names and ports so that they work when the local performance manager is running in a docker image and directly on the command line. * Update seed metadata script to the expected behavior. * Update metadata postgres version in yaml files for the metadata rds to match the version deployed on aws. * Improve environment validation to handle multiple databases. * Update tests to use both database managers
- Loading branch information
1 parent
f2e898c
commit 1d2846d
Showing
19 changed files
with
632 additions
and
272 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
98 changes: 98 additions & 0 deletions
98
python_src/src/lamp_py/migrations/versions/metadata_prod/001_07903947aabe_initial_changes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
"""initial changes | ||
Revision ID: 07903947aabe | ||
Revises: | ||
Create Date: 2023-12-11 15:12:47.261091 | ||
""" | ||
from alembic import op | ||
from sqlalchemy.exc import ProgrammingError | ||
import logging | ||
import sqlalchemy as sa | ||
|
||
from lamp_py.postgres.postgres_utils import DatabaseIndex, DatabaseManager | ||
from lamp_py.postgres.metadata_schema import MetadataLog | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "07903947aabe" | ||
down_revision = None | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"metadata_log", | ||
sa.Column("pk_id", sa.Integer(), nullable=False), | ||
sa.Column("rail_pm_processed", sa.Boolean(), nullable=True), | ||
sa.Column("rail_pm_process_fail", sa.Boolean(), nullable=True), | ||
sa.Column("path", sa.String(length=256), nullable=False), | ||
sa.Column( | ||
"created_on", | ||
sa.DateTime(timezone=True), | ||
server_default=sa.text("now()"), | ||
nullable=True, | ||
), | ||
sa.PrimaryKeyConstraint("pk_id"), | ||
sa.UniqueConstraint("path"), | ||
) | ||
op.create_index( | ||
"ix_metadata_log_not_processed", | ||
"metadata_log", | ||
["path"], | ||
unique=False, | ||
postgresql_where=sa.text("rail_pm_processed = false"), | ||
) | ||
|
||
# pull metadata from the rail performance manager database into the | ||
# metadata database. the table may or may not exist, so wrap this in a try | ||
# except | ||
try: | ||
rpm_db_manager = DatabaseManager( | ||
db_index=DatabaseIndex.RAIL_PERFORMANCE_MANAGER | ||
) | ||
|
||
insert_data = [] | ||
# pull metadata from the rail performance manager database via direct | ||
# sql query. the metadata_log table may or may not exist. | ||
with rpm_db_manager.session.begin() as session: | ||
result = session.execute( | ||
"SELECT path, processed, process_fail FROM metadata_log" | ||
) | ||
for row in result: | ||
(path, processed, process_fail) = row | ||
insert_data.append( | ||
{ | ||
"path": path, | ||
"rail_pm_processed": processed, | ||
"rail_pm_process_fail": process_fail, | ||
} | ||
) | ||
|
||
except ProgrammingError as error: | ||
# Error 42P01 is an 'Undefined Table' error. This occurs when there is | ||
# no metadata_log table in the rail performance manager database | ||
# | ||
# Raise all other sql errors | ||
insert_data = [] | ||
if error.orig.pgcode == "42P01": | ||
logging.info("No Metadata Table in Rail Performance Manager") | ||
else: | ||
raise | ||
|
||
# insert data into the metadata database | ||
if insert_data: | ||
op.bulk_insert(MetadataLog.__table__, insert_data) | ||
|
||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index( | ||
"ix_metadata_log_not_processed", | ||
table_name="metadata_log", | ||
) | ||
op.drop_table("metadata_log") | ||
# ### end Alembic commands ### |
98 changes: 98 additions & 0 deletions
98
..._src/src/lamp_py/migrations/versions/metadata_staging/001_07903947aabe_initial_changes.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
"""initial changes | ||
Revision ID: 07903947aabe | ||
Revises: | ||
Create Date: 2023-12-11 15:12:47.261091 | ||
""" | ||
from alembic import op | ||
from sqlalchemy.exc import ProgrammingError | ||
import logging | ||
import sqlalchemy as sa | ||
|
||
from lamp_py.postgres.postgres_utils import DatabaseIndex, DatabaseManager | ||
from lamp_py.postgres.metadata_schema import MetadataLog | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "07903947aabe" | ||
down_revision = None | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"metadata_log", | ||
sa.Column("pk_id", sa.Integer(), nullable=False), | ||
sa.Column("rail_pm_processed", sa.Boolean(), nullable=True), | ||
sa.Column("rail_pm_process_fail", sa.Boolean(), nullable=True), | ||
sa.Column("path", sa.String(length=256), nullable=False), | ||
sa.Column( | ||
"created_on", | ||
sa.DateTime(timezone=True), | ||
server_default=sa.text("now()"), | ||
nullable=True, | ||
), | ||
sa.PrimaryKeyConstraint("pk_id"), | ||
sa.UniqueConstraint("path"), | ||
) | ||
op.create_index( | ||
"ix_metadata_log_not_processed", | ||
"metadata_log", | ||
["path"], | ||
unique=False, | ||
postgresql_where=sa.text("rail_pm_processed = false"), | ||
) | ||
|
||
# pull metadata from the rail performance manager database into the | ||
# metadata database. the table may or may not exist, so wrap this in a try | ||
# except | ||
try: | ||
rpm_db_manager = DatabaseManager( | ||
db_index=DatabaseIndex.RAIL_PERFORMANCE_MANAGER | ||
) | ||
|
||
insert_data = [] | ||
# pull metadata from the rail performance manager database via direct | ||
# sql query. the metadata_log table may or may not exist. | ||
with rpm_db_manager.session.begin() as session: | ||
result = session.execute( | ||
"SELECT path, processed, process_fail FROM metadata_log" | ||
) | ||
for row in result: | ||
(path, processed, process_fail) = row | ||
insert_data.append( | ||
{ | ||
"path": path, | ||
"rail_pm_processed": processed, | ||
"rail_pm_process_fail": process_fail, | ||
} | ||
) | ||
|
||
except ProgrammingError as error: | ||
# Error 42P01 is an 'Undefined Table' error. This occurs when there is | ||
# no metadata_log table in the rail performance manager database | ||
# | ||
# Raise all other sql errors | ||
insert_data = [] | ||
if error.orig.pgcode == "42P01": | ||
logging.info("No Metadata Table in Rail Performance Manager") | ||
else: | ||
raise | ||
|
||
# insert data into the metadata database | ||
if insert_data: | ||
op.bulk_insert(MetadataLog.__table__, insert_data) | ||
|
||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade() -> None: | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index( | ||
"ix_metadata_log_not_processed", | ||
table_name="metadata_log", | ||
) | ||
op.drop_table("metadata_log") | ||
# ### end Alembic commands ### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.