diff --git a/.github/container/Dockerfile.base b/.github/container/Dockerfile.base index 7e16eeb7c..b7cd44b26 100644 --- a/.github/container/Dockerfile.base +++ b/.github/container/Dockerfile.base @@ -37,6 +37,14 @@ RUN install-cudnn.sh ADD install-ofed.sh /usr/local/bin RUN install-ofed.sh +############################################################################## +## Amazon EFA support (need to run it inside container separately) +############################################################################## + +ADD install-efa.sh /usr/local/bin +ENV LD_LIBRARY_PATH=/opt/amazon/efa/lib:${LD_LIBRARY_PATH} +ENV PATH=/opt/amazon/efa/bin:${PATH} + ############################################################################### ## Emergency fix: nsys not in PATH ############################################################################### diff --git a/.github/container/install-efa.sh b/.github/container/install-efa.sh new file mode 100755 index 000000000..14b744b8a --- /dev/null +++ b/.github/container/install-efa.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +set -ex + +# Update distro +apt-get update + +# Install required packages +apt-get install -y curl + +# clean up all previously installed library to avoid conflicts +# while installing Amazon EFA version +dpkg --purge efa-config efa-profile libfabric openmpi \ + ibacm ibverbs-providers ibverbs-utils infiniband-diags \ + libibmad-dev libibmad5 libibnetdisc-dev libibnetdisc5 \ + libibumad-dev libibumad3 libibverbs-dev libibverbs1 librdmacm-dev \ + librdmacm1 rdma-core rdmacm-utils + +# Download Amazon EFA package and install +EFA_INSTALLER_VERSION=latest +WORKDIR=$(mktemp -d) + +pushd ${WORKDIR} + +AMAZON_EFA_LINK="https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz" +curl -O "$AMAZON_EFA_LINK" +tar -xf aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz && cd aws-efa-installer +./efa_installer.sh -y -g -d --skip-kmod --skip-limit-conf --no-verify + +popd + +# check the installation is successful +/opt/amazon/efa/bin/fi_info --version + +# Clean up +apt-get clean +rm -rf /var/lib/apt/lists/* +rm -rf ${WORKDIR}