Skip to content

Commit

Permalink
Add dagster and mlflow docker images (#3)
Browse files Browse the repository at this point in the history
* feat: Add dagster Dockerfile. Adjust project structure

* ci: Add dagster image build step

* feat: Add mlflow Dockerfile. Adjust jupyter and dagster Dockerfile

* ci: Add mlflow Docker image build step

* ci: fix invalid value `contest`

* ci: Make ids unique

* feat: Add ´torch` and `torchvision` to jupyter image

* feat: Add seaborn as dependency
  • Loading branch information
aiakide authored Nov 5, 2023
1 parent 1d974db commit 9b593f6
Show file tree
Hide file tree
Showing 10 changed files with 1,645 additions and 1,045 deletions.
33 changes: 29 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,42 @@ jobs:
restore-keys: |
${{ runner.os }}-buildx-
- name: Build and push Docker image
id: docker_build_amd64
- name: Build and push jupyter Docker image
id: docker_build_amd64_jupyter
uses: docker/build-push-action@v2.4.0
with:
context: .
tags: codecentric/from-jupyter-to-production-baseimage:latest
file: ./docker/jupyter/Dockerfile
tags: codecentric/from-jupyter-to-production-jupyter:latest
push: true
builder: ${{ steps.buildx.outputs.name }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
platforms: linux/amd64,linux/arm64

- name: Build and push dagster Docker image
id: docker_build_amd64_dagster
uses: docker/build-push-action@v2.4.0
with:
context: .
file: ./docker/dagster/Dockerfile
tags: codecentric/from-jupyter-to-production-dagster:latest
push: true
builder: ${{ steps.buildx.outputs.name }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
platforms: linux/amd64,linux/arm64
- name: Build and push mlflow Docker image
id: docker_build_amd64_mlflow
uses: docker/build-push-action@v2.4.0
with:
context: .
file: ./docker/mlflow/Dockerfile
tags: codecentric/from-jupyter-to-production-mlflow:latest
push: true
builder: ${{ steps.buildx.outputs.name }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
platforms: linux/amd64,linux/arm64

- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
3 changes: 2 additions & 1 deletion configs/jupyter_lab_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@
# Default: 'jupyter_server.services.contents.largefilemanager.AsyncLargeFileManager'
# c.ServerApp.contents_manager_class = 'jupyter_server.services.contents.largefilemanager.AsyncLargeFileManager'


## DEPRECATED. Use IdentityProvider.cookie_options
# Default: {}
# c.ServerApp.cookie_options = {}
Expand Down Expand Up @@ -896,7 +897,7 @@
# be used to enable and disable the loading ofthe extensions. The extensions
# will be loaded in alphabetical order.
# Default: {}
# c.ServerApp.jpserver_extensions = {}
c.ServerApp.jpserver_extensions = {"jupytext":True}

## The kernel manager class to use.
# Default: 'jupyter_server.services.kernels.kernelmanager.MappingKernelManager'
Expand Down
3 changes: 3 additions & 0 deletions configs/jupytext.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[formats]
"dagster/" = "ipynb"
"dagster/scripts/" = "py:percent"
91 changes: 91 additions & 0 deletions docker/dagster/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# syntax=docker/dockerfile:1

# Based on https://gist.githubusercontent.com/usr-ein/c42d98abca3cb4632ab0c2c6aff8c88a/raw/19dcc899f68d0b08c2c137d3fd01715b0c84bac9/Dockerfile

################################
# PYTHON-BASE
# Sets up all our shared environment variables
################################
FROM --platform=$TARGETPLATFORM python:3.11-slim as python-base

ARG TARGETPLATFORM

# python
ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
PYTHONDONTWRITEBYTECODE=1 \
\
# pip
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# poetry
# https://python-poetry.org/docs/configuration/#using-environment-variables
POETRY_VERSION=1.6.1 \
# make poetry install to this location
POETRY_HOME="/opt/poetry" \
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_IN_PROJECT=true \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
\
# paths
# this is where our requirements + virtual environment will live
PYSETUP_PATH="/opt/pysetup" \
VENV_PATH="/opt/pysetup/.venv"


# prepend poetry and venv to path
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"


################################
# BUILDER-BASE
# Used to build deps + create our virtual environment
################################
FROM python-base as builder-base
RUN apt-get update \
&& apt-get install --no-install-recommends -y \
# deps for installing poetry
curl \
# deps for building python deps
build-essential

# install poetry - respects $POETRY_VERSION & $POETRY_HOME
# The --mount will mount the buildx cache directory to where
# Poetry and Pip store their cache so that they can re-use it
RUN --mount=type=cache,target=/root/.cache \
curl -sSL https://install.python-poetry.org | python3 -

# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY ../../poetry.lock pyproject.toml ./

# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN --mount=type=cache,target=/root/.cache \
poetry install --with dagster-webserver,dagster --without mlflow


################################
# PRODUCTION
# Final image used for runtime
################################
FROM python-base as production
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH

RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app

ENV DAGSTER_HOME=/opt/dagster/dagster_home/

# Copy your workspace to /opt/dagster/app

COPY ./docker/dagster/workspace.yaml /opt/dagster/app/
COPY ./docker/dagster/dagster.yaml $DAGSTER_HOME/


WORKDIR /opt/dagster/app
EXPOSE 3000
ENV SHELL="/bin/bash"

ENTRYPOINT ["dagster","dev","-h","0.0.0.0","-p","3000"]
2 changes: 2 additions & 0 deletions docker/dagster/dagster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
telemetry:
enabled: False
4 changes: 4 additions & 0 deletions docker/dagster/workspace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# workspace.yaml

load_from:
- python_file: /opt/dagster/code/dagster-demo.py
86 changes: 86 additions & 0 deletions docker/jupyter/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# syntax=docker/dockerfile:1

# Based on https://gist.githubusercontent.com/usr-ein/c42d98abca3cb4632ab0c2c6aff8c88a/raw/19dcc899f68d0b08c2c137d3fd01715b0c84bac9/Dockerfile

################################
# PYTHON-BASE
# Sets up all our shared environment variables
################################
FROM --platform=$TARGETPLATFORM python:3.11-slim as python-base

ARG TARGETPLATFORM

# python
ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
PYTHONDONTWRITEBYTECODE=1 \
\
# pip
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# poetry
# https://python-poetry.org/docs/configuration/#using-environment-variables
POETRY_VERSION=1.6.1 \
# make poetry install to this location
POETRY_HOME="/opt/poetry" \
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_IN_PROJECT=true \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
\
# paths
# this is where our requirements + virtual environment will live
PYSETUP_PATH="/opt/pysetup" \
VENV_PATH="/opt/pysetup/.venv"


# prepend poetry and venv to path
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"


################################
# BUILDER-BASE
# Used to build deps + create our virtual environment
################################
FROM python-base as builder-base
RUN apt-get update \
&& apt-get install --no-install-recommends -y \
# deps for installing poetry
curl \
# deps for building python deps
build-essential \
# git
git

# install poetry - respects $POETRY_VERSION & $POETRY_HOME
# The --mount will mount the buildx cache directory to where
# Poetry and Pip store their cache so that they can re-use it
RUN --mount=type=cache,target=/root/.cache \
curl -sSL https://install.python-poetry.org | python3 -

# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY ../../poetry.lock pyproject.toml ./

# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN --mount=type=cache,target=/root/.cache \
poetry install --with dagster,mlflow --without dagster-webserver

################################
# PRODUCTION
# Final image used for runtime
################################
FROM python-base as production
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH

ADD ../../configs/jupyter_lab_config.py /root/.jupyter/jupyter_lab_config.py
ADD ../../configs/jupytext.toml /root/.config/jupytext.toml

WORKDIR /workshop
EXPOSE 8888 4141
ENV SHELL="/bin/bash"
ENV GIT_PYTHON_REFRESH="quiet"

ENTRYPOINT ["jupyter", "lab"]
20 changes: 12 additions & 8 deletions Dockerfile → docker/mlflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,23 @@ RUN apt-get update \
# deps for installing poetry
curl \
# deps for building python deps
build-essential
build-essential \
gcc \
git

# install poetry - respects $POETRY_VERSION & $POETRY_HOME
# The --mount will mount the buildx cache directory to where
# The --mount will mount the buildx cache directory to where
# Poetry and Pip store their cache so that they can re-use it
RUN --mount=type=cache,target=/root/.cache \
curl -sSL https://install.python-poetry.org | python3 -

# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY poetry.lock pyproject.toml ./
COPY ../../poetry.lock pyproject.toml ./

# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN --mount=type=cache,target=/root/.cache \
poetry install
poetry install --only mlflow


################################
Expand All @@ -74,9 +76,11 @@ RUN --mount=type=cache,target=/root/.cache \
FROM python-base as production
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH

ADD configs/jupyter_lab_config.py /root/.jupyter/jupyter_lab_config.py
WORKDIR /workshop
EXPOSE 8888 4141 5001

WORKDIR /ml_data
EXPOSE 5001
ENV SHELL="/bin/bash"
ENV GIT_PYTHON_REFRESH="quiet"


ENTRYPOINT ["jupyter", "lab"]
ENTRYPOINT ["mlflow","server","--host","0.0.0.0","--port","5001"]
Loading

0 comments on commit 9b593f6

Please sign in to comment.