Use nvidia tensorflow image; check NUMA settings
This commit is contained in:
parent
f335235b28
commit
db11c15897
3 changed files with 73 additions and 25 deletions
24
Makefile
24
Makefile
|
|
@ -7,16 +7,26 @@ up: run
|
|||
run: build
|
||||
docker compose up
|
||||
|
||||
build: jupyter/Dockerfile jupyter/cuda.deb
|
||||
docker compose build
|
||||
|
||||
jupyter/cuda.deb:
|
||||
curl https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-debian11-12-0-local_12.0.0-525.60.13-1_amd64.deb -o cuda.deb
|
||||
build: jupyter/Dockerfile
|
||||
docker compose build --progress plain
|
||||
|
||||
down:
|
||||
docker compose down
|
||||
|
||||
|
||||
.PHONY: default up run down build
|
||||
# all of this assumes there's a single Nvidia GPU
|
||||
precheck:
|
||||
echo hihi
|
||||
lspci | grep -i nvidia | head | awk '{ print $1 }'
|
||||
DEVICE := $(shell lspci | grep -i nvidia | head | awk '{ print $1 }')
|
||||
FILE := /sys/bus/pci/devices/0000:${DEVICE}/numa_node
|
||||
NUMA_STATE := $(shell cat ${FILE})
|
||||
if [[ ${NUMA_STATE} -ne 0 ]]; then \
|
||||
echo "NUMA is not connected to your GPU. Try:"; \
|
||||
echo " echo 0 | sudo tee ${FILE}"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
|
||||
.PHONY: default up run down build precheck
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,14 @@ services:
|
|||
build:
|
||||
context: ./jupyter
|
||||
dockerfile: Dockerfile
|
||||
privileged: true
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 0.0.0.0:9001:9001
|
||||
- 0.0.0.0:6006:6006 # for TensorBoard
|
||||
volumes:
|
||||
- ./notebooks:/notebooks
|
||||
- ./jupyter/overrides.json:/opt/conda/share/jupyter/lab/settings/overrides.json
|
||||
- ./jupyter/overrides.json:/usr/local/share/jupyter/lab/settings/overrides.json
|
||||
environment:
|
||||
- JUPYTER_TOKEN=12345
|
||||
devices:
|
||||
|
|
|
|||
|
|
@ -1,35 +1,71 @@
|
|||
|
||||
FROM continuumio/miniconda3
|
||||
FROM nvcr.io/nvidia/tensorflow:24.01-tf2-py3
|
||||
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
# get miniconda3 installed
|
||||
# cribbed from https://github.com/ContinuumIO/docker-images/blob/main/miniconda3/debian/Dockerfile
|
||||
RUN apt-get update -q && \
|
||||
apt-get install -q -y --no-install-recommends \
|
||||
bzip2 \
|
||||
ca-certificates \
|
||||
git \
|
||||
libglib2.0-0 \
|
||||
libsm6 \
|
||||
libxext6 \
|
||||
libxrender1 \
|
||||
mercurial \
|
||||
openssh-client \
|
||||
procps \
|
||||
subversion \
|
||||
wget \
|
||||
curl \
|
||||
&& apt-get clean \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN ls /usr/local/lib/python3.10/dist-packages && true
|
||||
RUN /bin/false
|
||||
|
||||
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
ARG CONDA_VERSION=py311_23.11.0-1
|
||||
|
||||
RUN cd /tmp \
|
||||
&& curl "https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh" -o miniconda.sh \
|
||||
&& mkdir -p /opt \
|
||||
&& bash miniconda.sh -b -p /opt/conda \
|
||||
&& rm miniconda.sh \
|
||||
&& ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \
|
||||
&& echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \
|
||||
&& echo "conda activate base" >> ~/.bashrc \
|
||||
&& find /opt/conda/ -follow -type f -name '*.a' -delete \
|
||||
&& find /opt/conda/ -follow -type f -name '*.js.map' -delete \
|
||||
&& /opt/conda/bin/conda clean -afy
|
||||
|
||||
|
||||
|
||||
RUN conda update -y -n base -c conda-forge conda
|
||||
|
||||
RUN conda create -y --name jupyter python=3.10
|
||||
RUN conda create -y --name jupyter
|
||||
|
||||
# CUDA toolkit 12.0 is not available in conda-forge, and my host OS has 12.0
|
||||
COPY cuda.deb /tmp
|
||||
RUN dpkg -i /tmp/cuda.deb && rm /tmp/cuda.deb
|
||||
|
||||
RUN CONDA_OVERRIDE_CUDA=12.0 \
|
||||
conda install -n jupyter --quiet -y -c conda-forge \
|
||||
jupyterlab \
|
||||
RUN conda run --no-capture-output -n jupyter \
|
||||
pip3 install --user --force-reinstall --ignore-installed \
|
||||
tensorflow[with-gpu] \
|
||||
keras \
|
||||
cudnn \
|
||||
keras-tuner \
|
||||
numpy \
|
||||
h5py \
|
||||
tensorflow=2.15 \
|
||||
&& /bin/true # only added to make the installed package lines consistent
|
||||
|
||||
RUN conda install -n jupyter --quiet -y -c conda-forge \
|
||||
RUN pip3 install \
|
||||
pandas \
|
||||
librosa \
|
||||
matplotlib \
|
||||
pyarrow \
|
||||
pydot \
|
||||
pillow \
|
||||
keras-tuner \
|
||||
&& /bin/true # as above
|
||||
|
||||
SHELL ["conda", "run", "-n", "jupyter", "/bin/bash", "-c"]
|
||||
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "jupyter" ]
|
||||
CMD ["jupyter", "lab", "--ip", "0.0.0.0", "--port", "9001", "--no-browser", "--allow-root", "--LabApp.token=''", "--notebook-dir=/notebooks", "--ResourceUseDisplay.track_cpu_percent=True" ]
|
||||
ENV LD_LIBRARY_PATH /usr/local/cuda-12.0/compat:/usr/local/cuda-12.0/targets/x86_64-linux/lib/:$LD_LIBRARY_PATH
|
||||
|
||||
CMD ["jupyter", "lab", "--ip", "0.0.0.0", "--port", "9001", "--no-browser", "--allow-root", "--LabApp.token=''", "--notebook-dir=/notebooks", "--ResourceUseDisplay.track_cpu_percent=True" ]
|
||||
|
|
|
|||
Loading…
Reference in a new issue