commit 388e0fa8be6645b06f3bae663afdb8955720e3ea Author: Erik Stambaugh Date: Mon Feb 19 19:48:10 2024 -0800 Initial version -- can create a jupyter lab container diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0251df0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +notebooks +.*.sw* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..96b2da6 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ + + +default: up + +up: run + +run: build + docker compose up + +build: docker-compose.yaml jupyterlab.Dockerfile + docker compose build + +down: + docker compose down + + +.PHONY: default up run down build + + diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..a685cd6 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,89 @@ + +## the basic path + +- [X] make a jupyterlab docker container! + - [ ] volume mount for notebooks + - [ ] volume for training data? + +- [ ] get a model working in jupyter notebook +- [ ] make a dockerfile to set up the base image +- [ ] script to train and validate the model, packaging up the trained model alongside a report of its accuracy etc. + - [ ] include the package requirements in the artifact + +- [ ] taking that trained model and putting it in a flask or fastAPI container + +- [ ] how about using my laptop-docker-compose pytest script thing? that's pretty great for what i need + +## next steps + +- [ ] get the data into redis for training -- or another data store? +- [ ] how about a clean path from jupyter to docker? +- [ ] CI/CD: get it working with git and webhooks +- [ ] git workflow with webhooks + - [ ] what is a good project model? here's a guess: + - base dockerfiles for conda and build scripts (?) + - model definition + - dataset definitions + - rest API + - test container +- [ ] whylogs to track model drift? + +## questions + + +- [ ] how do we get from jupyter to docker? what's a clean path? +- [ ] what's a good practice for making training data available? Just have a redis container? + +- [ ] does it make sense to have each component be a separate CI/CD managed project? + - data ingest (for each dataset?) + - feature extraction + - labeling + - jupyter notebook for experimentation + - converter for jupyter-> app model (or do we just rely on people copypasting) + - model trainer (+verification?) + - [ ] maybe we just define a production application and worry about the rest later? + - model + API (or messaging system) + - + +- [ ] what's a good way to get project dependencies into a jupyter container? + +- [ ] does a setup like above help with parameter optimization? + - [ ] should an optimizer be its own containerized service? + +- [ ] how can we reuse feature extraction for regular model input? + - [ ] should a feature extractor be a service? + + +## What needs doing? + +- [ ] step one is writing out all these steps! + +- [ ] docker image build pipeline: + - [ ] miniconda image + - [ ] how do we pull in the requirements for a particular model? + - honestly just ship a Dockerfile alongside the model + - but it needs to pull from a manifest that an individual developer using something like jupyter would use + - [ ] verify this part of the pipeline according to the training materials + +- [ ] feature extraction + +- [ ] training phase + + +- [ ] packaging up a pre-trained model artifact + +- [ ] CI/CD + [ ] webhook plugin for gitea? https://github.com/adnanh/webhook + + +- [ ] missing from the course???? + - [ ] training data store! + - [ ] labeling phase (is that the right term?) -- unattended? + - [ ] feature extraction phase + +## refer: + +- https://github.com/docker-science/cookiecutter-docker-science?tab=readme-ov-file +- https://docker-science.github.io/ + + diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..1a0172b --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,15 @@ +--- +version: "3.5" +services: + jupyter: + container_name: jupyter + build: + context: . + dockerfile: jupyterlab.Dockerfile + restart: unless-stopped + ports: + - 0.0.0.0:9001:9001 + volumes: + - ./notebooks:/notebooks + + diff --git a/jupyterlab.Dockerfile b/jupyterlab.Dockerfile new file mode 100644 index 0000000..ad63e9a --- /dev/null +++ b/jupyterlab.Dockerfile @@ -0,0 +1,19 @@ + +FROM continuumio/miniconda3 + +# docker run -i -t -p 8888:8888 continuumio/miniconda3 /bin/bash \ +# -c "/opt/conda/bin/conda install jupyter -y --quiet && mkdir \ +# /opt/notebooks && /opt/conda/bin/jupyter notebook \ +# --notebook-dir=/opt/notebooks --ip='*' --port=8888 \ +# --no-browser --allow-root" + +RUN /opt/conda/bin/conda install jupyterlab -y --quiet \ + && mkdir /opt/notebooks + +CMD /opt/conda/bin/jupyter lab \ + --notebook-dir=/notebooks \ + --ip='*' \ + --port=9001 \ + --no-browser \ + --allow-root +