Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: HyperFile API Publishing and Parquet Flat File Creation #176

Merged
merged 5 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,10 @@ SPRINGBOARD_BUCKET=mbta-ctd-dataplatform-dev-springboard
ARCHIVE_BUCKET=mbta-ctd-dataplatform-dev-archive
ERROR_BUCKET=mbta-ctd-dataplatform-dev-error
INCOMING_BUCKET=mbta-ctd-dataplatform-dev-incoming
# mbta-performance with personal access
PUBLIC_ARCHIVE_BUCKET=mbta-ctd-dataplatform-dev-archive

# Tableau
TABLEAU_USER=DOUPDATE
TABLEAU_PASSWORD=DOUPDATE
Comment on lines +21 to +22
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be updated with personal username/password for dev development

TABLEAU_SERVER=http://awtabDEV02.mbta.com
4 changes: 2 additions & 2 deletions .github/actions/python_deps/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ runs:

- name: Install Python Deps
working-directory: python_src
# env use python3.9 to force usage of python3.9 installed by asdf over system python version
# env use python3.10 to force usage of python3.10 installed by asdf over system python version
run: |
poetry env use python3.9
poetry env use python3.10
poetry install -v
shell: bash
if: "!steps.python-cache.outputs.cache-hit"
2 changes: 1 addition & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ persistent=yes

# Minimum Python version to use for version dependent checks. Will default to
# the version used to run pylint.
py-version=3.9
py-version=3.10

# Discover python modules and packages in the file system subtree.
recursive=no
Expand Down
2 changes: 1 addition & 1 deletion .tool-versions
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
poetry 1.4.2
python 3.9.12
python 3.10.13
elixir 1.14.4-otp-25
erlang 25.3
direnv 2.32.2
2 changes: 1 addition & 1 deletion python_src/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9-slim
FROM python:3.10-slim

# Keeps Python from generating .pyc files in the container
ENV PYTHONDONTWRITEBYTECODE 1
Expand Down
1,421 changes: 795 additions & 626 deletions python_src/poetry.lock

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions python_src/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@ ingestion = 'lamp_py.ingestion.pipeline:start'
performance_manager = 'lamp_py.performance_manager.pipeline:start'
seed_metadata = 'lamp_py.postgres.seed_metadata:run'
snapshot = 'lamp_py.postgres.snapshot:run'
hyper_update = 'lamp_py.tableau.pipeline:start_hyper_updates'

[tool.poetry.dependencies]
python = "^3.9"
python = "^3.10"
SQLAlchemy = "^1.4.39"
pyarrow = "^13.0.0"
boto3 = "^1.23.3"
boto3 = "^1.28.62"
pandas = "^1.4.3"
numpy = "^1.23.1"
psycopg2 = "^2.9.3"
psutil = "^5.9.1"
schedule = "^1.1.0"
alembic = "^1.10.2"
types-pytz = "^2023.3.0.1"
tableauhyperapi = "^0.0.17971"
tableauserverclient = "^0.28"

[tool.poetry.dev-dependencies]
black = "^23.1.0"
Expand All @@ -48,7 +51,7 @@ disallow_untyped_defs = true
ignore_missing_imports = true
plugins = ["sqlalchemy.ext.mypy.plugin"]
pretty = true
python_version = 3.9
python_version = "3.10"
warn_unreachable = true
warn_unused_ignores = true

Expand Down
73 changes: 73 additions & 0 deletions python_src/src/lamp_py/aws/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,79 @@ def get_s3_client() -> boto3.client:
return boto3.client("s3")


def upload_file(file_name: str, object_path: str) -> bool:
"""
Upload a local file to an S3 Bucket

:param file_name: local file path to upload
:param object_path: S3 object path to upload to (including bucket)

:return: True if file was uploaded, else False
"""
upload_log = ProcessLogger(
"s3_upload_file",
file_name=file_name,
object_path=object_path,
)
upload_log.log_start()

try:
if not os.path.exists(file_name):
raise FileNotFoundError(f"{file_name} not found locally")

object_path = object_path.replace("s3://", "")
bucket, object_name = object_path.split("/", 1)

s3_client = get_s3_client()

s3_client.upload_file(file_name, bucket, object_name)

upload_log.log_complete()

return True

except Exception as exception:
upload_log.log_failure(exception=exception)
return False


def download_file(object_path: str, file_name: str) -> bool:
"""
Download an S3 object to a local file
will overwrite local file, if exists

:param object_path: S3 object path to download from (including bucket)
:param file_name: local file path to save object to

:return: True if file was downloaded, else False
"""
download_log = ProcessLogger(
"s3_download_file",
file_name=file_name,
object_path=object_path,
)
download_log.log_start()

try:
if os.path.exists(file_name):
os.remove(file_name)

object_path = object_path.replace("s3://", "")
bucket, object_name = object_path.split("/", 1)

s3_client = get_s3_client()

s3_client.download_file(bucket, object_name, file_name)

download_log.log_complete()

return True

except Exception as exception:
download_log.log_failure(exception=exception)
return False


def get_zip_buffer(filename: str) -> IO[bytes]:
"""
Get a buffer for a zip file from s3 so that it can be read by zipfile
Expand Down
2 changes: 2 additions & 0 deletions python_src/src/lamp_py/performance_manager/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from lamp_py.runtime_utils.alembic_migration import alembic_upgrade_to_head
from lamp_py.runtime_utils.env_validation import validate_environment
from lamp_py.runtime_utils.process_logger import ProcessLogger
from lamp_py.tableau.pipeline import start_parquet_updates

from .flat_file import write_flat_files
from .l0_gtfs_rt_events import process_gtfs_rt_files
Expand Down Expand Up @@ -67,6 +68,7 @@ def iteration() -> None:
process_static_tables(db_manager)
process_gtfs_rt_files(db_manager)
write_flat_files(db_manager)
start_parquet_updates()

process_logger.log_complete()
except Exception as exception:
Expand Down
1 change: 1 addition & 0 deletions python_src/src/lamp_py/tableau/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Utilites for Interacting with Tableau and Hyper files"""
Loading
Loading