diff --git a/Makefile b/Makefile index 1a4d1d9..6007c4d 100644 --- a/Makefile +++ b/Makefile @@ -7,16 +7,26 @@ up: run run: build docker compose up -build: jupyter/Dockerfile jupyter/cuda.deb - docker compose build - -jupyter/cuda.deb: - curl https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-debian11-12-0-local_12.0.0-525.60.13-1_amd64.deb -o cuda.deb +build: jupyter/Dockerfile + docker compose build --progress plain down: docker compose down - -.PHONY: default up run down build +# all of this assumes there's a single Nvidia GPU +precheck: + echo hihi + lspci | grep -i nvidia | head | awk '{ print $1 }' + DEVICE := $(shell lspci | grep -i nvidia | head | awk '{ print $1 }') + FILE := /sys/bus/pci/devices/0000:${DEVICE}/numa_node + NUMA_STATE := $(shell cat ${FILE}) + if [[ ${NUMA_STATE} -ne 0 ]]; then \ + echo "NUMA is not connected to your GPU. Try:"; \ + echo " echo 0 | sudo tee ${FILE}"; \ + exit 1; \ + fi + + +.PHONY: default up run down build precheck diff --git a/docker-compose.yaml b/docker-compose.yaml index acd61f6..77179c7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -6,12 +6,14 @@ services: build: context: ./jupyter dockerfile: Dockerfile + privileged: true restart: unless-stopped ports: - 0.0.0.0:9001:9001 + - 0.0.0.0:6006:6006 # for TensorBoard volumes: - ./notebooks:/notebooks - - ./jupyter/overrides.json:/opt/conda/share/jupyter/lab/settings/overrides.json + - ./jupyter/overrides.json:/usr/local/share/jupyter/lab/settings/overrides.json environment: - JUPYTER_TOKEN=12345 devices: diff --git a/jupyter/Dockerfile b/jupyter/Dockerfile index 3be3bc1..852afba 100644 --- a/jupyter/Dockerfile +++ b/jupyter/Dockerfile @@ -1,35 +1,71 @@ -FROM continuumio/miniconda3 +FROM nvcr.io/nvidia/tensorflow:24.01-tf2-py3 + + +ENV DEBIAN_FRONTEND noninteractive + +# get miniconda3 installed +# cribbed from https://github.com/ContinuumIO/docker-images/blob/main/miniconda3/debian/Dockerfile +RUN apt-get update -q && \ + apt-get install -q -y --no-install-recommends \ + bzip2 \ + ca-certificates \ + git \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + mercurial \ + openssh-client \ + procps \ + subversion \ + wget \ + curl \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN ls /usr/local/lib/python3.10/dist-packages && true +RUN /bin/false + + +ENV PATH /opt/conda/bin:$PATH +ARG CONDA_VERSION=py311_23.11.0-1 + +RUN cd /tmp \ + && curl "https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh" -o miniconda.sh \ + && mkdir -p /opt \ + && bash miniconda.sh -b -p /opt/conda \ + && rm miniconda.sh \ + && ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh \ + && echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc \ + && echo "conda activate base" >> ~/.bashrc \ + && find /opt/conda/ -follow -type f -name '*.a' -delete \ + && find /opt/conda/ -follow -type f -name '*.js.map' -delete \ + && /opt/conda/bin/conda clean -afy + + RUN conda update -y -n base -c conda-forge conda -RUN conda create -y --name jupyter python=3.10 +RUN conda create -y --name jupyter -# CUDA toolkit 12.0 is not available in conda-forge, and my host OS has 12.0 -COPY cuda.deb /tmp -RUN dpkg -i /tmp/cuda.deb && rm /tmp/cuda.deb - -RUN CONDA_OVERRIDE_CUDA=12.0 \ - conda install -n jupyter --quiet -y -c conda-forge \ - jupyterlab \ +RUN conda run --no-capture-output -n jupyter \ + pip3 install --user --force-reinstall --ignore-installed \ + tensorflow[with-gpu] \ keras \ - cudnn \ keras-tuner \ numpy \ h5py \ - tensorflow=2.15 \ && /bin/true # only added to make the installed package lines consistent -RUN conda install -n jupyter --quiet -y -c conda-forge \ +RUN pip3 install \ pandas \ librosa \ matplotlib \ - pyarrow \ - pydot \ pillow \ + keras-tuner \ && /bin/true # as above -SHELL ["conda", "run", "-n", "jupyter", "/bin/bash", "-c"] -ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "jupyter" ] -CMD ["jupyter", "lab", "--ip", "0.0.0.0", "--port", "9001", "--no-browser", "--allow-root", "--LabApp.token=''", "--notebook-dir=/notebooks", "--ResourceUseDisplay.track_cpu_percent=True" ] +ENV LD_LIBRARY_PATH /usr/local/cuda-12.0/compat:/usr/local/cuda-12.0/targets/x86_64-linux/lib/:$LD_LIBRARY_PATH +CMD ["jupyter", "lab", "--ip", "0.0.0.0", "--port", "9001", "--no-browser", "--allow-root", "--LabApp.token=''", "--notebook-dir=/notebooks", "--ResourceUseDisplay.track_cpu_percent=True" ]