Skip to content

Commit

Permalink
Fixed makefile for spark environment
Browse files Browse the repository at this point in the history
Signed-off-by: Maroun Touma <touma@us.ibm.com>
  • Loading branch information
touma-I committed Oct 23, 2024
1 parent 61fec0b commit 7d01b99
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 50 deletions.
36 changes: 19 additions & 17 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -297,8 +297,8 @@ __check_defined = \
# available in the current directory for use by the Dockerfile (i.e. to install the library).
# Note that this looks for the ../python directory, which is currently only used in the transform projects,
# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
.PHONY: .defaults.ray-lib-wheel-image
.defaults.ray-lib-whl-image:: .default.build-lib-wheel
.PHONY: .defaults.lib-whl-image
.defaults.lib-whl-image:: .default.build-lib-wheel
# Must be called with a DOCKER_LOCAL_IMAGE= settings.
@# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE) and library wheel
@$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl))
Expand All @@ -319,21 +319,23 @@ __check_defined = \
# Note that this looks for the ../python directory, which is currently only used in the transform projects,
# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
# Must be called with a DOCKER_LOCAL_IMAGE= settings.
.PHONY: .defaults.spark-lib-src-image
.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image
@# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
$(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability
ifeq ($(USE_REPO_LIB_SRC), 1)
$(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
$(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib
endif
if [ -e ../python ]; then \
$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
fi
$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image
-rm -rf data-processing-lib-python
-rm -rf data-processing-lib-spark
-rm -rf python-transform
#.PHONY: .defaults.spark-lib-src-image
#.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image
# @# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
# $(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability
#ifeq ($(USE_REPO_LIB_SRC), 1)
# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
# $(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib
#endif
# if [ -e ../python ]; then \
# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
# fi
# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image
# -rm -rf data-processing-lib-python
# -rm -rf data-processing-lib-spark
# -rm -rf python-transform



# Install the source from the given directory into an existing venv
# Expected PYTHON_PROJECT_DIR and uses EXTRA_INDEX_URL if set.
Expand Down
2 changes: 1 addition & 1 deletion tools/ingest2parquet/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ test:: venv test-src test-image

clean:: .defaults.clean

image:: .defaults.ray-lib-whl-image
image:: .defaults.lib-whl-image

test-src:: .defaults.test-src test-local

Expand Down
4 changes: 2 additions & 2 deletions transforms/.make.transforms
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ extra-help:
.transforms.python-image:: .defaults.python-lib-whl-image

.PHONY: .transforms.ray-image
.transforms.ray-image:: .defaults.ray-lib-whl-image
.transforms.ray-image:: .defaults.lib-whl-image

.PHONY: .transforms.spark-image
.transforms.spark-image:: .defaults.spark-lib-src-image
.transforms.spark-image:: .defaults.lib-whl-image

.PHONY: .transforms.python-build
.transforms.python-build:: .transforms.python-venv .transforms.python-image
Expand Down
11 changes: 5 additions & 6 deletions transforms/universal/doc_id/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ USER root
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG WHEEL_FILE_NAME

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[spark]

COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/
RUN cd data-processing-lib-spark && pip install --no-cache-dir -e .

# Install project source
COPY --chown=spark:root src/ src/
Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/filter/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ USER root
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[spark]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/
RUN cd data-processing-lib-spark && pip install --no-cache-dir -e .
COPY --chown=spark:root python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/noop/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ USER root
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[spark]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/
RUN cd data-processing-lib-spark && pip install --no-cache-dir -e .
COPY --chown=spark:root python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/profiler/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ USER root
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[spark]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/
RUN cd data-processing-lib-spark && pip install --no-cache-dir -e .
COPY --chown=spark:root python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down
12 changes: 6 additions & 6 deletions transforms/universal/resize/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ USER root
RUN pip install --no-cache-dir pytest

WORKDIR ${SPARK_HOME}/work-dir
ARG WHEEL_FILE_NAME

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=ray:users data-processing-dist data-processing-dist
RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[spark]

# Copy in the data processing framework source/project and install it
# This is expected to be placed in the docker context before this is run (see the make image).
COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .
COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/
RUN cd data-processing-lib-spark && pip install --no-cache-dir -e .
COPY --chown=spark:root python-transform/ python-transform/
RUN cd python-transform && pip install --no-cache-dir -e .

Expand Down

0 comments on commit 7d01b99

Please sign in to comment.