diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..e6990cd3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,23 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug, help wanted +assignees: '' +--- + +## 🐛 Bug + + + +### To Reproduce + + + +### Expected behavior + + + +### Additional context + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..d999a7b8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' +--- + +## 🚀 Feature + + + +### Motivation + + + +### Additional context + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..ef82380f --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.pyc +.idea/ +.vscode +.mypy_cache +.generated \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 4bec6afa..00000000 --- a/Dockerfile +++ /dev/null @@ -1,505 +0,0 @@ -FROM gcr.io/kaggle-images/python-tensorflow-whl:1.11.0-py36 as tensorflow_whl -FROM continuumio/anaconda3:5.2.0 - -ADD clean-layer.sh /tmp/clean-layer.sh -ADD patches/ /tmp/patches/ -ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl - -# This is necessary for apt to access HTTPS sources -RUN apt-get update && \ - apt-get install apt-transport-https && \ - /tmp/clean-layer.sh - - # Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, - # as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 -RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ - apt-get update && apt-get install -y build-essential unzip && \ - # https://stackoverflow.com/a/46498173 - conda update -y conda && conda update -y python && \ - pip install --upgrade pip && \ - apt-get -y install cmake && \ - /tmp/clean-layer.sh - -# Tensorflow doesn't support python 3.7 yet. See https://github.com/tensorflow/tensorflow/issues/20517 -# Fix to install tf 1.10:: Downgrade python 3.7->3.6.6 and downgrade Pandas 0.23.3->0.23.2 -RUN conda install -y python=3.6.6 && \ - pip install pandas==0.23.2 && \ - # Another fix for TF 1.10 https://github.com/tensorflow/tensorflow/issues/21518 - pip install keras_applications==1.0.4 --no-deps && \ - pip install keras_preprocessing==1.0.2 --no-deps && \ - /tmp/clean-layer.sh - -# The anaconda base image includes outdated versions of these packages. Update them to include the latest version. -RUN pip install --upgrade seaborn python-dateutil dask && \ - pip install pyyaml joblib pytagcloud husl geopy ml_metrics mne pyshp && \ - conda install -y -c conda-forge spacy && python -m spacy download en && \ - python -m spacy download en_core_web_lg && \ - # The apt-get version of imagemagick is out of date and has compatibility issues, so we build from source - apt-get -y install dbus fontconfig fontconfig-config fonts-dejavu-core fonts-droid-fallback ghostscript gsfonts hicolor-icon-theme \ - libavahi-client3 libavahi-common-data libavahi-common3 libcairo2 libcap-ng0 libcroco3 \ - libcups2 libcupsfilters1 libcupsimage2 libdatrie1 libdbus-1-3 libdjvulibre-text libdjvulibre21 libfftw3-double3 libfontconfig1 \ - libfreetype6 libgdk-pixbuf2.0-0 libgdk-pixbuf2.0-common libgomp1 libgraphite2-3 libgs9 libgs9-common libharfbuzz0b libijs-0.35 \ - libilmbase12 libjbig0 libjbig2dec0 libjpeg62-turbo liblcms2-2 liblqr-1-0 libltdl7 libmagickcore-6.q16-3 \ - libmagickcore-6.q16-3-extra libmagickwand-6.q16-3 libnetpbm10 libopenexr22 libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 \ - libpaper-utils libpaper1 libpixman-1-0 libpng16-16 librsvg2-2 librsvg2-common libthai-data libthai0 libtiff5 libwmf0.2-7 \ - libxcb-render0 libxcb-shm0 netpbm poppler-data p7zip-full && \ - cd /usr/local/src && \ - wget --no-verbose https://imagemagick.org/download/ImageMagick.tar.gz && \ - tar xzf ImageMagick.tar.gz && cd `ls -d ImageMagick-*` && pwd && ls -al && ./configure && \ - make -j $(nproc) && make install && \ - /tmp/clean-layer.sh - -# Install tensorflow from a pre-built wheel -COPY --from=tensorflow_whl /tmp/tensorflow_cpu/*.whl /tmp/tensorflow_cpu/ -RUN pip install /tmp/tensorflow_cpu/tensorflow*.whl && \ - rm -rf /tmp/tensorflow_cpu && \ - /tmp/clean-layer.sh - -RUN apt-get install -y libfreetype6-dev && \ - apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ - pip install gensim && \ - pip install textblob && \ - pip install wordcloud && \ - conda install -y -c conda-forge python-igraph && \ - pip install xgboost && \ - pip install lightgbm && \ - pip install git+git://github.com/Lasagne/Lasagne.git && \ - #keras - cd /usr/local/src && mkdir keras && cd keras && \ - git clone --depth 1 https://github.com/fchollet/keras.git && \ - cd keras && python setup.py install && \ - #keras-rl - cd /usr/local/src && mkdir keras-rl && cd keras-rl && \ - git clone --depth 1 https://github.com/matthiasplappert/keras-rl.git && \ - cd keras-rl && python setup.py install && \ - #keras-rcnn - pip install git+https://github.com/broadinstitute/keras-rcnn && \ - #neon - cd /usr/local/src && \ - git clone --depth 1 https://github.com/NervanaSystems/neon.git && \ - cd neon && pip install . && \ - #nolearn - cd /usr/local/src && mkdir nolearn && cd nolearn && \ - git clone --depth 1 https://github.com/dnouri/nolearn.git && cd nolearn && \ - echo "x" > README.rst && echo "x" > CHANGES.rst && \ - python setup.py install && \ - # Dev branch of Theano - pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps && \ - # put theano compiledir inside /tmp (it needs to be in writable dir) - printf "[global]\nbase_compiledir = /tmp/.theano\n" > /.theanorc && \ - cd /usr/local/src && git clone --depth 1 https://github.com/pybrain/pybrain && \ - cd pybrain && python setup.py install && \ - # Base ATLAS - apt-get install -y libatlas-base-dev && \ - cd /usr/local/src && git clone --depth 1 https://github.com/ztane/python-Levenshtein && \ - cd python-Levenshtein && python setup.py install && \ - pip install hep_ml && \ - # chainer - pip install chainer && \ - # NLTK Project datasets - mkdir -p /usr/share/nltk_data && \ - # NLTK Downloader no longer continues smoothly after an error, so we explicitly list - # the corpuses that work - python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ - basque_grammars biocreative_ppi bllip_wsj_no_aux \ - book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ - comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ - europarl_raw floresta gazetteers genesis gutenberg \ - ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ - masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ - mte_teip5 names nps_chat omw opinion_lexicon paradigms \ - pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ - pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ - sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ - state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ - twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ - vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \ - # Stop-words - pip install stop-words && \ - /tmp/clean-layer.sh - -# Make sure the dynamic linker finds the right libstdc++ -ENV LD_LIBRARY_PATH=/opt/conda/lib - -RUN apt-get -y install zlib1g-dev liblcms2-dev libwebp-dev libgeos-dev && \ - pip install matplotlib==2.2.3 && \ - pip install pyshp && \ - pip install pyproj && \ - cd /usr/local/src && git clone https://github.com/matplotlib/basemap.git && \ - cd basemap && \ - git checkout v1.1.0 && \ - python setup.py install && \ - pip install basemap --no-binary basemap && \ - # sasl is apparently an ibis dependency - apt-get -y install libsasl2-dev && \ - # ...as is psycopg2 - apt-get install -y libpq-dev && \ - pip install ibis-framework && \ - # Cartopy plus dependencies - yes | conda install proj4 && \ - pip install packaging && \ - pip install shapely && \ - pip install cartopy && \ - # MXNet - pip install mxnet && \ - # h2o (requires java) - # Upgrade numpy with pip to avoid install errors - pip install --upgrade numpy && \ - # requires java - apt-get install -y default-jdk && \ - cd /usr/local/src && mkdir h2o && cd h2o && \ - wget --no-verbose http://h2o-release.s3.amazonaws.com/h2o/latest_stable -O latest && \ - wget --no-verbose --no-check-certificate -i latest -O h2o.zip && rm latest && \ - unzip h2o.zip && rm h2o.zip && cp h2o-*/h2o.jar . && \ - pip install `find . -name "*whl"` && \ - # Keras setup - # Keras likes to add a config file in a custom directory when it's - # first imported. This doesn't work with our read-only filesystem, so we - # have it done now - python -c "from keras.models import Sequential" && \ - # Switch to TF backend - sed -i 's/theano/tensorflow/' /root/.keras/keras.json && \ - # Re-run it to flush any more disk writes - python -c "from keras.models import Sequential; from keras import backend; print(backend._BACKEND)" && \ - # Keras reverts to /tmp from ~ when it detects a read-only file system - mkdir -p /tmp/.keras && cp /root/.keras/keras.json /tmp/.keras && \ - /tmp/clean-layer.sh - - # scikit-learn dependencies -RUN pip install scipy && \ - pip install scikit-learn && \ - # HDF5 support - conda install h5py && \ - pip install biopython && \ - # PUDB, for local debugging convenience - pip install pudb && \ - # Imbalanced-learn - cd /usr/local/src && git clone https://github.com/scikit-learn-contrib/imbalanced-learn.git && \ - cd imbalanced-learn && python setup.py install && \ - # Convex Optimization library - # Latest version fails to install, see https://github.com/cvxopt/cvxopt/issues/77 - # and https://github.com/cvxopt/cvxopt/issues/80 - # pip install cvxopt && \ - # Profiling and other utilities - pip install line_profiler && \ - pip install orderedmultidict && \ - pip install smhasher && \ - conda install -y -c bokeh bokeh && \ - pip install datashader && \ - # Boruta (python implementation) - cd /usr/local/src && git clone https://github.com/danielhomola/boruta_py.git && \ - cd boruta_py && python setup.py install && \ - cd /usr/local/src && git clone git://github.com/nicolashennetier/pyeconometrics.git && \ - cd pyeconometrics && python setup.py install && \ - apt-get install -y graphviz && pip install graphviz && \ - # Pandoc is a dependency of deap - apt-get install -y pandoc && \ - cd /usr/local/src && git clone git://github.com/scikit-learn-contrib/py-earth.git && \ - cd py-earth && python setup.py install && \ - pip install essentia && \ - # PyTorch - export CXXFLAGS="-std=c++11" && \ - export CFLAGS="-std=c99" && \ - conda install -y pytorch-cpu torchvision-cpu -c pytorch && \ - # PyTorch Audio - apt-get install -y sox libsox-dev libsox-fmt-all && \ - pip install cffi && \ - cd /usr/local/src && git clone https://github.com/pytorch/audio && cd audio && python setup.py install && \ - /tmp/clean-layer.sh - -# vtk with dependencies -RUN apt-get install -y libgl1-mesa-glx && \ - pip install vtk && \ - # xvfbwrapper with dependencies - apt-get install -y xvfb && \ - pip install xvfbwrapper && \ - /tmp/clean-layer.sh - -RUN pip install --upgrade mpld3 && \ - pip install mplleaflet && \ - pip install gpxpy && \ - pip install arrow && \ - pip install nilearn && \ - pip install nibabel && \ - pip install pronouncing && \ - pip install markovify && \ - pip install rf_perm_feat_import && \ - pip install imgaug && \ - pip install preprocessing && \ - pip install Baker && \ - pip install path.py && \ - pip install Geohash && \ - # https://github.com/vinsci/geohash/issues/4 - sed -i -- 's/geohash/.geohash/g' /opt/conda/lib/python3.6/site-packages/Geohash/__init__.py && \ - pip install deap && \ - pip install tpot && \ - pip install scikit-optimize && \ - pip install haversine && \ - pip install toolz cytoolz && \ - pip install sacred && \ - pip install plotly && \ - pip install git+https://github.com/nicta/dora.git && \ - pip install git+https://github.com/hyperopt/hyperopt.git && \ - # tflean. Deep learning library featuring a higher-level API for TensorFlow. http://tflearn.org - pip install git+https://github.com/tflearn/tflearn.git && \ - pip install fitter && \ - pip install langid && \ - # Delorean. Useful for dealing with datetime - pip install delorean && \ - pip install trueskill && \ - pip install heamy && \ - pip install vida && \ - # Useful data exploration libraries (for missing data and generating reports) - pip install missingno && \ - pip install pandas-profiling && \ - pip install s2sphere && \ - pip install git+https://github.com/fmfn/BayesianOptimization.git && \ - pip install matplotlib-venn && \ - pip install pyldavis && \ - # Pattern not yet Py3 compatible... - # pip install pattern && \ - pip install git+git://github.com/rasbt/mlxtend.git#egg=mlxtend && \ - pip install altair && \ - pip install pystan && \ - pip install ImageHash && \ - conda install -y ecos && \ - conda install -y CVXcanon && \ - /tmp/clean-layer.sh - -RUN pip install fancyimpute && \ - pip install git+https://github.com/pymc-devs/pymc3 && \ - pip install tifffile && \ - pip install spectral && \ - pip install descartes && \ - pip install geojson && \ - pip install pysal && \ - pip install pyflux && \ - pip install terminalplot && \ - pip install raccoon && \ - pip install pydicom && \ - pip install wavio && \ - pip install SimpleITK && \ - pip install hmmlearn && \ - pip install bayespy && \ - pip install gplearn && \ - pip install PyAstronomy && \ - pip install squarify && \ - pip install fuzzywuzzy && \ - pip install python-louvain && \ - pip install pyexcel-ods && \ - pip install sklearn-pandas && \ - pip install stemming && \ - pip install fbprophet && \ - pip install holoviews && \ - pip install geoviews && \ - pip install hypertools && \ - # Nxviz has been causing an installation issue by trying unsuccessfully to remove setuptools. - #pip install nxviz && \ - pip install py_stringsimjoin && \ - pip install speedml && \ - pip install nibabel && \ - pip install mlens && \ - pip install scikit-multilearn && \ - pip install cleverhans && \ - pip install leven && \ - pip install catboost && \ - #cd /usr/local/src && git clone --depth=1 https://github.com/AxeldeRomblay/MLBox && cd MLBox/python-package && python setup.py install && \ - pip install fastFM && \ - pip install lightfm && \ - pip install paramnb && \ - pip install folium && \ - pip install scikit-plot && \ - # 0.15.0 is still unstable. - pip install dipy==0.14.0 && \ - # plotnine 0.5 is depending on matplotlib >= 3.0 which is not compatible with basemap. - # once basemap support matplotlib, we can unpin this package. - pip install plotnine==0.4.0 && \ - pip install git+https://github.com/dvaida/hallucinate.git && \ - pip install scikit-surprise && \ - pip install pymongo && \ - pip install edward && \ - pip install geoplot && \ - pip install eli5 && \ - pip install implicit && \ - pip install dask-ml[xgboost] && \ - /tmp/clean-layer.sh - -RUN pip install kmeans-smote --no-dependencies && \ - # Add google PAIR-code Facets - cd /opt/ && git clone https://github.com/PAIR-code/facets && cd facets/ && jupyter nbextension install facets-dist/ --user && \ - export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \ - pip install --no-dependencies ethnicolr && \ - pip install tensorpack && \ - pip install pycountry && pip install iso3166 && \ - pip install pydash && \ - pip install kmodes --no-dependencies && \ - pip install librosa && \ - pip install polyglot && \ - pip install mmh3 && \ - pip install fbpca && \ - pip install sentencepiece && \ - pip install cufflinks && \ - pip install glmnet_py && \ - pip install lime && \ - pip install memory_profiler && \ - /tmp/clean-layer.sh - -# install cython & cysignals before pyfasttext -RUN pip install --upgrade cython && \ - pip install --upgrade cysignals && \ - pip install pyfasttext && \ - pip install ktext && \ - pip install git+git://github.com/facebookresearch/fastText.git && \ - apt-get install -y libhunspell-dev && pip install hunspell && \ - pip install annoy && \ - pip install category_encoders && \ - pip install google-cloud-bigquery && \ - pip install ortools && \ - pip install scattertext && \ - # Pandas data reader - pip install pandas-datareader && \ - pip install pykoko && \ - pip install wordsegment && \ - pip install pyahocorasick && \ - pip install wordbatch && \ - pip install emoji && \ - # Add Japanese morphological analysis engine - pip install janome && \ - pip install wfdb && \ - pip install vecstack && \ - pip install sklearn-contrib-lightning && \ - # yellowbrick machine learning visualization library - pip install yellowbrick && \ - pip install mlcrate && \ - # Required to display Altair charts in Jupyter notebook - pip install vega3 && \ - jupyter nbextension install --sys-prefix --py vega3 && \ - /tmp/clean-layer.sh - -# Fast.ai and dependencies -RUN pip install bcolz && \ - pip install bleach && \ - pip install certifi && \ - pip install cycler && \ - pip install decorator && \ - pip install entrypoints && \ - pip install html5lib && \ - pip install ipykernel && \ - pip install ipython && \ - pip install ipython-genutils && \ - pip install ipywidgets && \ - pip install isoweek && \ - pip install jedi && \ - pip install Jinja2 && \ - pip install jsonschema && \ - pip install jupyter && \ - pip install jupyter-client && \ - pip install jupyter-console && \ - pip install jupyter-core && \ - pip install MarkupSafe && \ - pip install mistune && \ - pip install nbconvert && \ - pip install nbformat && \ - pip install notebook==5.5.0 && \ - pip install numpy && \ - pip install olefile && \ - pip install opencv-python && \ - pip install --upgrade pandas && \ - pip install pandas_summary && \ - pip install pandocfilters && \ - pip install pexpect && \ - pip install pickleshare && \ - pip install Pillow && \ - pip install ptyprocess && \ - pip install Pygments && \ - pip install pyparsing && \ - pip install python-dateutil==2.6.0 && \ - pip install pytz && \ - pip install PyYAML && \ - pip install pyzmq && \ - pip install qtconsole && \ - pip install simplegeneric && \ - pip install six && \ - pip install terminado && \ - pip install testpath && \ - pip install tornado && \ - pip install tqdm && \ - pip install traitlets && \ - pip install wcwidth && \ - pip install webencodings && \ - pip install widgetsnbextension && \ - # Latest version of pyarrow conflicts with pandas - # https://github.com/pandas-dev/pandas/issues/23053 - pip install pyarrow==0.10.0 && \ - pip install feather-format && \ - pip install fastai && \ - pip install torchtext && \ - /tmp/clean-layer.sh - - ########### - # - # NEW CONTRIBUTORS: - # Please add new pip/apt installs in this block. Don't forget a "&& \" at the end - # of all non-final lines. Thanks! - # - ########### - -RUN pip install flashtext && \ - pip install marisa-trie && \ - pip install pyemd && \ - pip install pyupset && \ - pip install pympler && \ - pip install s3fs && \ - pip install featuretools && \ - pip install -e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper && \ - pip install hpsklearn && \ - pip install keras-tqdm && \ - pip install git+https://github.com/Kaggle/learntools && \ - pip install kmapper && \ - pip install shap && \ - pip install ray && \ - pip install gym && \ - pip install tensorforce && \ - pip install pyarabic && \ - pip install conx && \ - pip install pandasql && \ - pip install trackml && \ - pip install tensorflow_hub && \ - pip install jieba && \ - pip install PDPbox && \ - pip install ggplot && \ - pip install cesium && \ - pip install rgf_python && \ - /tmp/clean-layer.sh - -# Pin Vowpal Wabbit v8.6.0 because 8.6.1 does not build or install successfully -RUN cd /usr/local/src && \ - git clone -b 8.6.0 https://github.com/JohnLangford/vowpal_wabbit.git && \ - ./vowpal_wabbit/python/conda_install.sh && \ - # Reinstall in non-editable mode (without the -e flag) - pip install vowpal_wabbit/python && \ - /tmp/clean-layer.sh - -# For Facets -ENV PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ -# For Theano with MKL -ENV MKL_THREADING_LAYER=GNU - -# Temporary fixes and patches - # Temporary patch for Dask getting downgraded, which breaks Keras -RUN pip install --upgrade dask && \ - # Stop jupyter nbconvert trying to rewrite its folder hierarchy - mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \ - mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \ - # Stop Matplotlib printing junk to the console on first load - sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.6/site-packages/matplotlib/font_manager.py && \ - # Make matplotlib output in Jupyter notebooks display correctly - mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ - /tmp/clean-layer.sh - -# Add BigQuery client proxy settings -ENV PYTHONUSERBASE "/root/.local" -ADD patches/sitecustomize.py /root/.local/lib/python3.6/site-packages/sitecustomize.py - -# Set backend for matplotlib -ENV MPLBACKEND "agg" diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl new file mode 100644 index 00000000..9d661201 --- /dev/null +++ b/Dockerfile.tmpl @@ -0,0 +1,164 @@ +{{ if eq .Accelerator "gpu" }} +FROM us-docker.pkg.dev/colab-images/public/runtime:release-colab-external_20260126-060048_RC00 +{{ else }} +FROM us-docker.pkg.dev/colab-images/public/cpu-runtime:release-colab-external_20260123-060023_RC00 +{{ end}} + +ADD kaggle_requirements.txt /kaggle_requirements.txt + +# Freeze existing requirements from base image for critical packages: +RUN pip freeze | grep -E 'tensorflow|keras|torch|jax' > /colab_requirements.txt + +# Merge requirements files: +RUN cat /colab_requirements.txt >> /requirements.txt +RUN cat /kaggle_requirements.txt >> /requirements.txt + +# Install Kaggle packages +RUN uv pip install --system --no-cache -r /requirements.txt + +# Install manual packages: +# b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data. +RUN uv pip uninstall --system --no-cache google-cloud-bigquery-storage + +# uv cannot install this in requirements.txt without --no-build-isolation +# to avoid affecting the larger build, we'll post-install it. +RUN uv pip install --no-build-isolation --no-cache --system "git+https://github.com/Kaggle/learntools" + +# b/404590350: Ray and torchtune have conflicting cli named `tune`. `ray` is not part of Colab's base image. Re-install `tune` to ensure the torchtune CLI is available by default. +# b/468367647: Unpin protobuf, version greater than v5.29.5 causes issues with numerous packages +RUN uv pip install --system --force-reinstall --no-cache --no-deps torchtune +RUN uv pip install --system --force-reinstall --no-cache "protobuf==5.29.5" + +# Adding non-package dependencies: +ADD clean-layer.sh /tmp/clean-layer.sh +ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl +ADD patches/template_conf.json /opt/kaggle/conf.json + +ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages + +# Install GPU-specific non-pip packages. +{{ if eq .Accelerator "gpu" }} +RUN uv pip install --system --no-cache "pycuda" +{{ end }} + + +# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, +# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 +RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ + apt-get update --allow-releaseinfo-change && \ + # Needed by lightGBM (GPU build) + # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm + apt-get install -y build-essential unzip cmake libboost-dev libboost-system-dev libboost-filesystem-dev p7zip-full && \ + # b/182601974: ssh client was removed from the base image but is required for packages such as stable-baselines. + apt-get install -y openssh-client && \ + apt-get install -y graphviz && pip install graphviz && \ + /tmp/clean-layer.sh + +ADD patches/keras_internal.py \ + patches/keras_internal_test.py \ + $PACKAGE_PATH/tensorflow_decision_forests/keras/ + +RUN apt-get install -y libfreetype6-dev && \ + apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ + /tmp/clean-layer.sh + +RUN mkdir -p /usr/share/nltk_data && \ + # NLTK Downloader no longer continues smoothly after an error, so we explicitly list + # the corpuses that work + python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ + basque_grammars biocreative_ppi bllip_wsj_no_aux \ + book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ + comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ + europarl_raw floresta gazetteers genesis gutenberg \ + ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ + masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ + mte_teip5 names nps_chat omw opinion_lexicon paradigms \ + pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ + pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ + sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ + state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ + twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ + vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe + +RUN apt-get install -y git-lfs && \ + # vtk dependencies + apt-get install -y libgl1-mesa-glx && \ + # xvfbwrapper dependencies + apt-get install -y xvfb && \ + /tmp/clean-layer.sh + +# Download base easyocr models. +# https://github.com/JaidedAI/EasyOCR#usage +RUN mkdir -p /root/.EasyOCR/model && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip" -O /root/.EasyOCR/model/latin.zip && \ + unzip /root/.EasyOCR/model/latin.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/latin.zip && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip" -O /root/.EasyOCR/model/english.zip && \ + unzip /root/.EasyOCR/model/english.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/english.zip && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip" -O /root/.EasyOCR/model/craft_mlt_25k.zip && \ + unzip /root/.EasyOCR/model/craft_mlt_25k.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/craft_mlt_25k.zip && \ + /tmp/clean-layer.sh + +# Tesseract and some associated utility packages +RUN apt-get install tesseract-ocr -y && \ + /tmp/clean-layer.sh + +ENV TESSERACT_PATH=/usr/bin/tesseract \ + # For Facets, we also include an empty path to include $PWD. + PYTHONPATH=:$PYTHONPATH:/opt/facets/facets_overview/python/ \ + # For Theano with MKL + MKL_THREADING_LAYER=GNU + +# Temporary fixes and patches +# Stop jupyter nbconvert trying to rewrite its folder hierarchy +RUN mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \ + mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \ + # Make matplotlib output in Jupyter notebooks display correctly + mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ + /tmp/clean-layer.sh + +# install imagemagick for wand +# https://docs.wand-py.org/en/latest/guide/install.html#install-imagemagick-on-debian-ubuntu +RUN apt-get install libmagickwand-dev && \ + /tmp/clean-layer.sh + +# Override default imagemagick policies +ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml + +# Add Kaggle module resolver +ADD patches/kaggle_module_resolver.py $PACKAGE_PATH/tensorflow_hub/kaggle_module_resolver.py +RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' $PACKAGE_PATH/tensorflow_hub/config.py && \ + sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' $PACKAGE_PATH/tensorflow_hub/config.py + +# Add BigQuery client proxy settings +ENV PYTHONUSERBASE="/root/.local" +ADD patches/kaggle_gcp.py \ + patches/kaggle_secrets.py \ + patches/kaggle_session.py \ + patches/kaggle_web_client.py \ + patches/kaggle_datasets.py \ + $PACKAGE_PATH/ + +# Figure out why this is in a different place? +# Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it. +ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py + +ARG GIT_COMMIT=unknown \ + BUILD_DATE=unknown + +LABEL git-commit=$GIT_COMMIT \ + build-date=$BUILD_DATE + +ENV GIT_COMMIT=${GIT_COMMIT} \ + BUILD_DATE=${BUILD_DATE} + +# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`. +RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date + +{{ if eq .Accelerator "gpu" }} +# Add the CUDA home. +ENV CUDA_HOME=/usr/local/cuda +{{ end }} +ENTRYPOINT ["/usr/bin/env"] diff --git a/Jenkinsfile b/Jenkinsfile index b37db044..c4af03e6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,4 @@ -String cron_string = BRANCH_NAME == "master" ? "H 12 * * 1-5" : "" +String cron_string = BRANCH_NAME == "main" ? "H 12 * * 1-5" : "" // Mon-Fri at noon UTC, 8am EST, 5am PDT pipeline { agent { label 'ephemeral-linux' } @@ -14,121 +14,186 @@ pipeline { GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim() GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim() GIT_COMMIT_SUMMARY = "`` ${GIT_COMMIT_SUBJECT} - ${GIT_COMMIT_AUTHOR}" - SLACK_CHANNEL = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"master\" ]]; then echo \"#kernelops\"; else echo \"#builds\"; fi").trim() + MATTERMOST_CHANNEL = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"#kernelops\"; else echo \"#builds\"; fi").trim() + // Use dev branch names as tags, but replace '/' with '-' using sed since docker images don't support forward slash + PRETEST_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"ci-pretest\"; else echo \"${GIT_BRANCH}-pretest\" | sed 's/\\//-/g'; fi").trim() + STAGING_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"staging\"; else echo \"${GIT_BRANCH}-staging\" | sed 's/\\//-/g'; fi").trim() } stages { - stage('Docker CPU Build') { - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} docker build>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - set -exo pipefail + stage('Build/Test/Diff') { + parallel { + stage('CPU') { + stages { + stage('Build CPU Image') { + options { + timeout(time: 120, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail - ./build | ts - ''' - } - } + ./build | ts + ./push ${PRETEST_TAG} + ''' + } + } + stage('Diff CPU image') { + steps { + sh '''#!/bin/bash + set -exo pipefail - stage('Test CPU Image') { - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} test image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - set -exo pipefail + docker pull gcr.io/kaggle-images/python:${PRETEST_TAG} + ./diff --target gcr.io/kaggle-images/python:${PRETEST_TAG} + ''' + } + } + } + } + stage('GPU') { + agent { label 'ephemeral-linux-gpu' } + stages { + stage('Build GPU Image') { + options { + timeout(time: 4324, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail + # Remove images (dangling or not) created more than 72h (3 days ago) to prevent the GPU agent disk from filling up. + # Note: CPU agents are ephemeral and do not need to have their disk cleaned up. + docker image prune --all --force --filter "until=72h" --filter "label=kaggle-lang=python" + # Remove any dangling images (no tags). + # All builds for the same branch uses the same tag. This means a subsequent build for the same branch + # will untag the previously built image which is safe to do. Builds for a single branch are performed + # serially. + docker image prune -f + + ./build --gpu | ts + ./push --gpu ${PRETEST_TAG} + ''' + } + } + stage('Diff GPU Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail - date - ./test - ''' - } - } + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./diff --gpu --target gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } + } + } + } + stage('TPU VM') { + agent { label 'ephemeral-linux' } + stages { + stage('Build TPU VM Image') { + options { + timeout(time: 60, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail - stage('Push CPU Image') { - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} pushing image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - set -exo pipefail + ./tpu/build | ts + ./push --tpu ${PRETEST_TAG} + ''' + } + } + stage('Diff TPU VM Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail - date - ./push staging - ''' - } - } - - stage('Docker GPU Build') { - // A GPU is not required to build this image. However, in our current setup, - // the default runtime is set to nvidia (as opposed to runc) and there - // is no option to specify a runtime for the `docker build` command. - // - // TODO(rosbo) don't set `nvidia` as the default runtime and use the - // `--runtime=nvidia` flag for the `docker run` command when GPU support is needed. - agent { label 'ephemeral-linux-gpu' } - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} docker build>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - set -exo pipefail - docker image prune -a -f # remove previously built image to prevent disk from filling up - ./build --gpu | ts - ''' + docker pull gcr.io/kaggle-private-byod/python-tpuvm:${PRETEST_TAG} + ./diff --tpu --target gcr.io/kaggle-private-byod/python-tpuvm:${PRETEST_TAG} + ''' + } + } + } + } } } - stage('Test GPU Image') { - agent { label 'ephemeral-linux-gpu' } - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} test image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - set -exo pipefail + stage('Test') { + parallel { + stage('Test CPU Image') { + options { + timeout(time: 15, unit: 'MINUTES') + } + steps { + retry(2) { + sh '''#!/bin/bash + set -exo pipefail - date - ./test --gpu - ''' + date + docker pull gcr.io/kaggle-images/python:${PRETEST_TAG} + ./test --image gcr.io/kaggle-images/python:${PRETEST_TAG} + ''' + } + } + } + stage('Test on P100') { + agent { label 'ephemeral-linux-gpu' } + options { + timeout(time: 40, unit: 'MINUTES') + } + steps { + retry(2) { + sh '''#!/bin/bash + set -exo pipefail + + date + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./test --gpu --image gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } + } + } + stage('Test on T4x2') { + agent { label 'ephemeral-linux-gpu-t4x2' } + options { + timeout(time: 60, unit: 'MINUTES') + } + steps { + retry(2) { + sh '''#!/bin/bash + set -exo pipefail + + date + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./test --gpu --image gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } + } + } } } - stage('Push GPU Image') { - agent { label 'ephemeral-linux-gpu' } + stage('Label CPU/GPU Staging Images') { steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} pushing image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL sh '''#!/bin/bash set -exo pipefail - date - ./push --gpu staging + gcloud container images add-tag gcr.io/kaggle-images/python:${PRETEST_TAG} gcr.io/kaggle-images/python:${STAGING_TAG} + gcloud container images add-tag gcr.io/kaggle-private-byod/python:${PRETEST_TAG} gcr.io/kaggle-private-byod/python:${STAGING_TAG} + # NOTE(b/336842777): TPUVM images are tested on an actual TPU VM outside this pipeline, so they are not auto-promoted to :staging tag. ''' } } - - stage('Package Versions') { - parallel { - stage('CPU Diff') { - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} diff CPU image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - ./diff - ''' - } - } - stage('GPU Diff') { - agent { label 'ephemeral-linux-gpu' } - steps { - slackSend color: 'none', message: "*<${env.BUILD_URL}console|${JOB_NAME} diff GPU image>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL - sh '''#!/bin/bash - ./diff --gpu - ''' - } - } - } - } } post { failure { - slackSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL + mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } success { - slackSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL + mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } aborted { - slackSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL + mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } } } diff --git a/README.md b/README.md index 453a4ca8..315e7db2 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,71 @@ -**Kaggle Kernels** allow users to run scripts against our competitions and datasets without having to download data or set up their environment. Here's [an example](https://www.kaggle.com/devinanzelmo/d/devinanzelmo/dota-2-matches/setting-up-a-prediction-problem-dota-2): +# docker-python -![example script](http://i.imgur.com/yrWycNA.png) +[Kaggle Notebooks](https://www.kaggle.com/notebooks) allow users to run a Python Notebook in the cloud against our competitions and datasets without having to download data or set up their environment. -This is the Dockerfile (etc.) used for building the image that runs python scripts on Kaggle. [Here's](https://gcr.io/kaggle-images/python) the Docker image on Google Container Registry. +This repository includes the [Dockerfile](Dockerfile.tmpl) for building the CPU-only and GPU image that runs Python Notebooks on Kaggle. -## Getting started +Our Python Docker images are stored on the Google Container Registry at: -To get started with this image, read our [guide](http://blog.kaggle.com/2016/02/05/how-to-get-started-with-data-science-in-containers/) to using it yourself, or browse [Kaggle Kernels](https://www.kaggle.com/kernels) for ideas. +* CPU-only: [gcr.io/kaggle-images/python](https://gcr.io/kaggle-images/python) +* GPU: [gcr.io/kaggle-gpu-images/python](https://gcr.io/kaggle-gpu-images/python) -## Requesting new features +## Requesting new packages -**We welcome pull requests** if there are any packages you'd like to add! +First, evaluate whether installing the package yourself in your own notebooks suits your needs. See [guide](https://github.com/Kaggle/docker-python/wiki/Missing-Packages). -We can merge your request quickly if you check that it builds correctly. Here's how to do that. +If you the first step above doesn't work for your use case, [open an issue](https://github.com/Kaggle/docker-python/issues/new) or a [pull request](https://github.com/Kaggle/docker-python/pulls). -Start by running this image on your system: +## Opening a pull request -``` -me@my-computer:/home$ docker run --rm -it gcr.io/kaggle-images/python -root@d72b81a003e1:/# -``` +1. Edit [kaggle_requirements.txt](kaggle_requirements.txt). +1. Follow the instructions below to build a new image. +1. Add tests for your new package. See this [example](https://github.com/Kaggle/docker-python/blob/main/tests/test_fastai.py). +1. Follow the instructions below to test the new image. +1. Open a PR on this repo and you are all set! -Then follow the package's installation instructions for a Linux system. It could be as simple as installing via Pip: +## Building a new image -``` -root@d72b81a003e1:/# pip install coolpackage -Collecting coolpackage -[...etc...] +```sh +./build ``` -Once that's done, check that you can import it correctly. (Sometimes, if a package is missing a dependency, it throws an error when you try to import it.) +Flags: -``` -root@d72b81a003e1:/# python -Python 3.5.2 |Anaconda 4.2.0 (64-bit)| (default, Jul 2 2016, 17:53:06) -[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)] on linux -Type "help", "copyright", "credits" or "license" for more information. ->>> import coolpackage ->>> -``` +* `--gpu` to build an image for GPU. +* `--use-cache` for faster iterative builds. -Once that's working, add the necessary lines to our [Dockerfile](https://github.com/Kaggle/docker-python/blob/master/Dockerfile). (In this case, that would mean adding `pip install coolpackage` to the last section.) +## Testing a new image -Next run the build: +A suite of tests can be found under the `/tests` folder. You can run the test using this command: -``` -./build +```sh +./test ``` -Finally run the tests: +Flags: -``` -./test -``` +* `--gpu` to test the GPU image. +* `--pattern test_keras.py` or `-p test_keras.py` to run a single test +* `--image gcr.io/kaggle-images/python:ci-pretest` or `-i gcr.io/kaggle-images/python:ci-pretest` to test against a specific image -Then submit your pull request, and you're all set! +## Running the image +For the CPU-only image: +```sh +# Run the image built locally: +docker run --rm -it kaggle/python-build /bin/bash +# Run the pre-built image from gcr.io +docker run --rm -it gcr.io/kaggle-images/python /bin/bash +``` + +For the GPU image: + +```sh +# Run the image built locally: +docker run --runtime nvidia --rm -it kaggle/python-gpu-build /bin/bash +# Run the image pre-built image from gcr.io +docker run --runtime nvidia --rm -it gcr.io/kaggle-gpu-images/python /bin/bash +``` +To ensure your container can access the GPU, follow the instructions posted [here](https://github.com/Kaggle/docker-python/issues/361#issuecomment-448093930). diff --git a/build b/build index f2a7af45..83bbe577 100755 --- a/build +++ b/build @@ -7,14 +7,16 @@ Usage: $0 [OPTIONS] Build a new Python Docker image. Options: - -g, --gpu Build an image with GPU support. - -c, --use-cache Use layer cache when building a new image. + -g, --gpu Build an image with GPU support. + -c, --use-cache Use layer cache when building a new image. EOF } CACHE_FLAG='--no-cache' DOCKERFILE='Dockerfile' +ACCELERATOR='none' IMAGE_TAG='kaggle/python-build' +BUILD_ARGS='' while :; do case "$1" in @@ -25,6 +27,7 @@ while :; do -g|--gpu) IMAGE_TAG='kaggle/python-gpu-build' DOCKERFILE='gpu.Dockerfile' + ACCELERATOR='gpu' ;; -c|--use-cache) CACHE_FLAG='' @@ -41,9 +44,26 @@ while :; do shift done +BUILD_ARGS+=" --build-arg GIT_COMMIT=$(git rev-parse HEAD)" +BUILD_ARGS+=" --build-arg BUILD_DATE=$(date '+%Y%m%d-%H%M%S')" + readonly CACHE_FLAG readonly DOCKERFILE +readonly ACCELERATOR readonly IMAGE_TAG +readonly BUILD_ARGS + +SRCDIR=$(dirname "${BASH_SOURCE[0]}") +DOCKERFILE_OUTDIR="${SRCDIR}/.generated" +mkdir -p $DOCKERFILE_OUTDIR +DOCKERFILE_PATH="$DOCKERFILE_OUTDIR/$DOCKERFILE" + +# Generate Dockerfile from template. +echo "Generating Dockerfile from template..." +docker run --rm -v $PWD:/input:ro gcr.io/kaggle-images/go-renderizer:latest --ACCELERATOR=$ACCELERATOR /input/Dockerfile.tmpl > $DOCKERFILE_PATH +echo "==================== $DOCKERFILE START ====================" +cat $DOCKERFILE_PATH +echo "==================== $DOCKERFILE END ====================" set -x -docker build --rm --pull $CACHE_FLAG -t "$IMAGE_TAG" -f "$DOCKERFILE" . +docker build --rm --pull $CACHE_FLAG -t "$IMAGE_TAG" -f "$DOCKERFILE_PATH" $BUILD_ARGS . diff --git a/clean-layer.sh b/clean-layer.sh index fd14c1ee..9a50e7bf 100755 --- a/clean-layer.sh +++ b/clean-layer.sh @@ -10,13 +10,11 @@ set -e set -x -# Delete files that pip caches when installing a package. -rm -rf /root/.cache/pip/* # Delete old downloaded archive files apt-get autoremove -y # Delete downloaded archive files apt-get clean +# Ensures the current working directory won't be deleted +cd /usr/local/src/ # Delete source files used for building binaries -rm -rf /usr/local/src/* -# Delete conda downloaded tarballs -conda clean -y --tarballs +rm -rf /usr/local/src/* \ No newline at end of file diff --git a/dev.Dockerfile b/dev.Dockerfile new file mode 100644 index 00000000..4a661136 --- /dev/null +++ b/dev.Dockerfile @@ -0,0 +1,23 @@ +# This Dockerfile builds an image to quickly iterate on the kaggle libraries. +# +# Create a new image with the latest kaggle librairies using the latest image +# built by CI with a successful test run as the base. +# +# Usage: +# cd path/to/docker-python +# docker build -t kaggle/python-dev -f dev.Dockerfile . +# +# # you can run a container using the image using: +# docker run -it --rm kaggle/python-dev /bin/bash +# +# # you can run the tests against this new image using: +# ./test -i kaggle/python-dev -p test_user_secrets.py +# +FROM gcr.io/kaggle-images/python:staging + +ADD patches/kaggle_gcp.py /root/.local/lib/python3.7/site-packages/kaggle_gcp.py +ADD patches/kaggle_secrets.py /root/.local/lib/python3.7/site-packages/kaggle_secrets.py +ADD patches/kaggle_session.py /root/.local/lib/python3.7/site-packages/kaggle_session.py +ADD patches/kaggle_web_client.py /root/.local/lib/python3.7/site-packages/kaggle_web_client.py +ADD patches/kaggle_datasets.py /root/.local/lib/python3.7/site-packages/kaggle_datasets.py +ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustomize.py \ No newline at end of file diff --git a/diff b/diff index 6e1be902..c8251703 100755 --- a/diff +++ b/diff @@ -4,16 +4,23 @@ set -e usage() { cat << EOF Usage: $0 [OPTIONS] -Compare the locally built Docker image package versions against the prod image. +Compare a given Docker image package versions against the prod image. Options: -g, --gpu Compare GPU images. + -b, --base The base image to diff against. + -t, --target The image to diff against the base image. + Default is the locally built image. + -p, --package Only show diff for this package and its dependencies. EOF } BASE_IMAGE_TAG='gcr.io/kaggle-images/python:latest' +BASE_IMAGE_TAG_OVERRIDE='' TARGET_IMAGE_TAG='kaggle/python-build' +TARGET_IMAGE_TAG_OVERRIDE='' +PACKAGE_NAME='' while :; do case "$1" in @@ -25,6 +32,37 @@ while :; do BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/python:latest' TARGET_IMAGE_TAG='kaggle/python-gpu-build' ;; + -x|--tpu) + BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/python-tpuvm:latest' + TARGET_IMAGE_TAG='kaggle/python-tpuvm-build' + ;; + -b|--base) + if [[ -z "$2" ]]; then + usage + printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + BASE_IMAGE_TAG_OVERRIDE="$2" + shift # skip the flag value + ;; + -t|--target) + if [[ -z "$2" ]]; then + usage + printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + TARGET_IMAGE_TAG_OVERRIDE="$2" + shift # skip the flag value + ;; + -p|--package) + if [[ -z "$2" ]]; then + usage + printf 'ERROR: No PACKAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + PACKAGE_NAME="$2" + shift # skip the flag value + ;; -?*) usage printf 'ERROR: Unknown option: %s\n' "$1" >&2 @@ -37,19 +75,36 @@ while :; do shift done +if [[ -n "$BASE_IMAGE_TAG_OVERRIDE" ]]; then + BASE_IMAGE_TAG="$BASE_IMAGE_TAG_OVERRIDE" +fi + +if [[ -n "$TARGET_IMAGE_TAG_OVERRIDE" ]]; then + TARGET_IMAGE_TAG="$TARGET_IMAGE_TAG_OVERRIDE" +fi + readonly BASE_IMAGE_TAG readonly TARGET_IMAGE_TAG echo "Base: $BASE_IMAGE_TAG" echo "Target: $TARGET_IMAGE_TAG" -docker pull "$BASE_IMAGE_TAG" +if [[ "$BASE_IMAGE_TAG" == "gcr.io/"* ]]; then + docker pull "$BASE_IMAGE_TAG" +fi + + +if [[ -n "$PACKAGE_NAME" ]]; then + echo "Package: $PACKAGE_NAME" + CMDS=("python /tools/pip_list_versions.py $PACKAGE_NAME | sort") +else + CMDS=("pip list --format=freeze" 'cat /etc/os-release | grep -oP "PRETTY_NAME=\"\K([^\"]*)"' "uname -r" "dpkg --list | awk '{print \$2\"==\"\$3}'" "printenv | sort") +fi -CMDS=('dpkg-query --show -f "${Package}==${Version}\n"' 'pip freeze') for cmd in "${CMDS[@]}"; do echo "== Comparing $cmd ==" diff --suppress-common-lines --side-by-side \ - <(docker run --rm "$BASE_IMAGE_TAG" $cmd) \ - <(docker run --rm "$TARGET_IMAGE_TAG" $cmd) \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$BASE_IMAGE_TAG" -c "$cmd") \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$TARGET_IMAGE_TAG" -c "$cmd") \ && echo 'No diff' || true done diff --git a/gpu.Dockerfile b/gpu.Dockerfile deleted file mode 100644 index 41ec73b3..00000000 --- a/gpu.Dockerfile +++ /dev/null @@ -1,56 +0,0 @@ -FROM nvidia/cuda:9.1-cudnn7-devel-ubuntu16.04 AS nvidia -FROM gcr.io/kaggle-images/python-tensorflow-whl:1.11.0-py36 as tensorflow_whl -FROM gcr.io/kaggle-images/python:staging - -ADD clean-layer.sh /tmp/clean-layer.sh - -# Cuda support -COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg - -# Ensure the cuda libraries are compatible with the custom Tensorflow wheels. -# TODO(b/120050292): Use templating to keep in sync or COPY installed binaries from it. -ENV CUDA_VERSION=9.1.85 -ENV CUDA_PKG_VERSION=9-1=$CUDA_VERSION-1 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems. -# When intended to be used with actual GPUs, make sure to (besides providing access to the host -# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One -# convenient way to do so is to obscure its contents by a bind mount: -# docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ... -ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV NVIDIA_REQUIRE_CUDA="cuda>=9.0" -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cupti-$CUDA_PKG_VERSION \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libcudnn7=7.2.1.38-1+cuda9.0 \ - libcudnn7-dev=7.2.1.38-1+cuda9.0 \ - libnccl2=2.2.12-1+cuda9.1 \ - libnccl-dev=2.2.12-1+cuda9.1 && \ - ln -s /usr/local/cuda-9.1 /usr/local/cuda && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ - /tmp/clean-layer.sh - -# Reinstall packages with a separate version for GPU support -# Tensorflow -COPY --from=tensorflow_whl /tmp/tensorflow_gpu/*.whl /tmp/tensorflow_gpu/ -RUN pip uninstall -y tensorflow && \ - pip install /tmp/tensorflow_gpu/tensorflow*.whl && \ - rm -rf /tmp/tensorflow_gpu && \ - conda uninstall -y pytorch-cpu torchvision-cpu && \ - conda install -y pytorch torchvision -c pytorch && \ - /tmp/clean-layer.sh - -# Install GPU-only packages -RUN pip install pycuda && \ - /tmp/clean-layer.sh diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt new file mode 100644 index 00000000..30e0683f --- /dev/null +++ b/kaggle_requirements.txt @@ -0,0 +1,128 @@ +# Please keep this in alphabetical order +Boruta +Cartopy +ImageHash +Janome +PyArabic +PyUpSet +Pympler +Rtree +shapely +SimpleITK +TPOT +Wand +bayesian-optimization +boto3 +catboost +category-encoders +cesium +comm +cytoolz +# Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported" +# https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor +datasets>=2.14.6 +deap +dipy +docker +easyocr +emoji +fasttext +featuretools +fiona +fury +fuzzywuzzy +geojson +gensim +# b/443054743,b/455550872 +google-adk[a2a,eval]>=1.21.0 +google-cloud-aiplatform +google-cloud-videointelligence +google-cloud-vision +google-genai +gpxpy +h2o +haversine +hep-ml +igraph +ipympl +ipywidgets==8.1.5 +isoweek +jedi +# jitler 0.11.1 breaks simulation image +jiter==0.10.0 +# b/276358430: fix Jupyter lsp freezing up the jupyter server +jupyter-lsp==1.5.1 +# b/333854354: pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354) +jupyter_server==2.12.5 +jupyter_server_proxy +jupyterlab +jupyterlab-lsp +kaggle>=1.8.3 +kaggle-environments +kagglehub[pandas-datasets,hf-datasets,signing]>=0.4.2 +keras-cv +keras-nlp +keras-tuner +kornia +langid +libpysal +lime +line_profiler +mamba +matplotlib +mlcrate +mne +mpld3 +# b/274619697: learntools requires a specific nbconvert right now +nbconvert==6.4.5 +nbdev +nilearn +olefile +onnx +openslide-bin +openslide-python +optuna +pandas-profiling +pandasql +papermill +path +path.py +pdf2image +plotly-express +pudb +pyLDAvis +pycryptodome +pydicom +pyemd +pyexcel-ods +pymongo +pypdf +pytesseract +python-lsp-server +pytorch-ignite +pytorch-lightning +qgrid +qtconsole +ray +rgf-python +s3fs +scikit-learn +# Scikit-learn accelerated library for x86 +scikit-learn-intelex>=2023.0.1 +scikit-multilearn +scikit-optimize +scikit-plot +scikit-surprise +git+https://github.com/facebookresearch/segment-anything.git +squarify +tensorflow-io +# Must be compatible with torch version: https://github.com/meta-pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec +torchcodec==0.9 +torchinfo +torchmetrics +torchtune +transformers>=5.0.0 +vtk +wavio +xvfbwrapper +ydata-profiling diff --git a/kaggle_tools_update.Dockerfile b/kaggle_tools_update.Dockerfile deleted file mode 100644 index 8f7fac20..00000000 --- a/kaggle_tools_update.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# This Dockerfile is a temporary solution until we -# resolved the broken main build. - -# This Dockerfile creates a new image based on our -# current published python image with the latest -# version of the LearnTools library to allow us -# to release new Learn content. It also configures -# pip to work out-of-the-box when internet access -# is enabled. - -# Usage: -# docker rmi gcr.io/kaggle-images/python:pinned -# docker build --rm -t kaggle/python-build -f kaggle_tools_update.Dockerfile . -# ./test -# ./push (if tests are passing) - -# Pull the last build manually tagged as "pinned". -FROM gcr.io/kaggle-images/python:pinned - -RUN pip install --upgrade git+https://github.com/Kaggle/learntools \ No newline at end of file diff --git a/patches/imagemagick-policy.xml b/patches/imagemagick-policy.xml new file mode 100644 index 00000000..981fb59f --- /dev/null +++ b/patches/imagemagick-policy.xml @@ -0,0 +1,10 @@ + + + + + +]> + \ No newline at end of file diff --git a/patches/kaggle_datasets.py b/patches/kaggle_datasets.py new file mode 100644 index 00000000..e60db2ef --- /dev/null +++ b/patches/kaggle_datasets.py @@ -0,0 +1,42 @@ +import os +import sys +from os import listdir +from os.path import isdir, join +from kaggle_web_client import KaggleWebClient + +_KAGGLE_TPU_NAME_ENV_VAR_NAME = 'TPU_NAME' +_KAGGLE_TPUVM_NAME_ENV_VAR_NAME = 'ISTPUVM' +_KAGGLE_INPUT_DIR = '/kaggle/input' + +class KaggleDatasets: + GET_GCS_PATH_ENDPOINT = '/requests/CopyDatasetVersionToKnownGcsBucketRequest' + TIMEOUT_SECS = 600 + + # Integration types for GCS + AUTO_ML = 1 + TPU = 2 + + def __init__(self): + self.web_client = KaggleWebClient() + self.has_tpu = os.getenv(_KAGGLE_TPU_NAME_ENV_VAR_NAME) is not None + self.has_tpuvm = os.getenv(_KAGGLE_TPUVM_NAME_ENV_VAR_NAME) is not None + + def get_gcs_path(self, dataset_dir: str = None) -> str: + if self.has_tpuvm: + if dataset_dir is None: + onlydirs = [f for f in listdir(_KAGGLE_INPUT_DIR) if isdir(join(_KAGGLE_INPUT_DIR, f))] + if len(onlydirs) == 1: + dataset_dir = onlydirs[0] + else: + raise Exception("Could not infer dataset_dir. dataset_dir can only be inferred if there is exactly 1 Kaggle dataset attached.") + dataset = join(_KAGGLE_INPUT_DIR, dataset_dir) + print("get_gcs_path is not required on TPU VMs which can directly use Kaggle datasets, using path: " + dataset, file=sys.stderr) + return dataset + + integration_type = self.TPU if self.has_tpu else self.AUTO_ML + data = { + 'MountSlug': dataset_dir, + 'IntegrationType': integration_type, + } + result = self.web_client.make_post_request(data, self.GET_GCS_PATH_ENDPOINT, self.TIMEOUT_SECS) + return result['destinationBucket'] diff --git a/patches/kaggle_gcp.py b/patches/kaggle_gcp.py new file mode 100644 index 00000000..4cb98858 --- /dev/null +++ b/patches/kaggle_gcp.py @@ -0,0 +1,362 @@ +import os +import inspect +import logging +from google.auth import credentials, environment_vars +from google.auth.exceptions import RefreshError +from google.api_core.gapic_v1.client_info import ClientInfo +from google.cloud import bigquery +from google.cloud.exceptions import Forbidden +from google.cloud.bigquery._http import Connection +from kaggle_secrets import GcpTarget, UserSecretsClient + +KAGGLE_GCP_CLIENT_USER_AGENT="kaggle-gcp-client/1.0" + +def get_integrations(): + kernel_integrations_var = os.getenv("KAGGLE_KERNEL_INTEGRATIONS") + kernel_integrations = KernelIntegrations() + if kernel_integrations_var is None: + return kernel_integrations + for integration in kernel_integrations_var.split(':'): + try: + target = GcpTarget[integration.upper()] + kernel_integrations.add_integration(target) + except KeyError as e: + logging.debug(f"Unknown integration target: {integration.upper()}") + return kernel_integrations + + +class KernelIntegrations(): + def __init__(self): + self.integrations = {} + + def add_integration(self, target): + self.integrations[target] = True + + def has_integration(self, target): + return target in self.integrations + + def has_bigquery(self): + return GcpTarget.BIGQUERY in self.integrations + + def has_gcs(self): + return GcpTarget.GCS in self.integrations + + def has_cloudai(self): + return GcpTarget.CLOUDAI in self.integrations or \ + GcpTarget.AUTOML in self.integrations + +class KaggleKernelCredentials(credentials.Credentials): + """Custom Credentials used to authenticate using the Kernel's connected OAuth account. + Example usage: + client = bigquery.Client(project='ANOTHER_PROJECT', + credentials=KaggleKernelCredentials()) + """ + def __init__(self, target=GcpTarget.BIGQUERY): + super().__init__() + self.target = target + + def refresh(self, request): + try: + client = UserSecretsClient() + if self.target == GcpTarget.BIGQUERY: + self.token, self.expiry = client.get_bigquery_access_token() + elif self.target == GcpTarget.GCS: + self.token, self.expiry = client._get_gcs_access_token() + elif self.target == GcpTarget.CLOUDAI: + self.token, self.expiry = client._get_cloudai_access_token() + except ConnectionError as e: + logging.error(f"Connection error trying to refresh access token: {e}") + print("There was a connection error trying to fetch the access token. " + f"Please ensure internet is on in order to use the {self.target.service} Integration.") + raise RefreshError('Unable to refresh access token due to connection error.') from e + except Exception as e: + logging.error(f"Error trying to refresh access token: {e}") + if (not get_integrations().has_integration(self.target)): + logging.error(f"No {self.target.service} integration found.") + print( + f"Please ensure you have selected a {self.target.service} account in the Notebook Add-ons menu.") + raise RefreshError('Unable to refresh access token.') from e + +class KaggleKernelWithProjetCredentials(KaggleKernelCredentials): + """ Wrapper Kaggle Credentials with quota_project_id. + """ + def __init__(self, parentCredential=None, quota_project_id=None): + super().__init__(target=parentCredential.target) + self._quota_project_id=quota_project_id + +class _DataProxyConnection(Connection): + """Custom Connection class used to proxy the BigQuery client to Kaggle's data proxy.""" + + def __init__(self, client, **kwargs): + super().__init__(client, **kwargs) + self.extra_headers["X-KAGGLE-PROXY-DATA"] = os.getenv( + "KAGGLE_DATA_PROXY_TOKEN") + + def api_request(self, *args, **kwargs): + """Wrap Connection.api_request in order to handle errors gracefully. + """ + try: + return super().api_request(*args, **kwargs) + except Forbidden as e: + msg = ("Permission denied using Kaggle's public BigQuery integration. " + "Did you mean to select a BigQuery account in the Notebook Add-ons menu?") + print(msg) + logging.info(msg) + raise e + + +class PublicBigqueryClient(bigquery.client.Client): + """A modified BigQuery client that routes requests using Kaggle's Data Proxy to provide free access to Public Datasets. + Example usage: + from kaggle import PublicBigqueryClient + client = PublicBigqueryClient() + """ + + def __init__(self, *args, **kwargs): + data_proxy_project = os.getenv("KAGGLE_DATA_PROXY_PROJECT") + default_api_endpoint = os.getenv("KAGGLE_DATA_PROXY_URL") + anon_credentials = credentials.AnonymousCredentials() + anon_credentials.refresh = lambda *args: None + super().__init__( + project=data_proxy_project, credentials=anon_credentials, *args, **kwargs + ) + # TODO: Remove this once https://github.com/googleapis/google-cloud-python/issues/7122 is implemented. + self._connection = _DataProxyConnection(self, api_endpoint=default_api_endpoint) + +def has_been_monkeypatched(method): + return "kaggle_gcp" in inspect.getsourcefile(method) + +def is_user_secrets_token_set(): + return "KAGGLE_USER_SECRETS_TOKEN" in os.environ + +def is_proxy_token_set(): + return "KAGGLE_DATA_PROXY_TOKEN" in os.environ + +def init_bigquery(): + from google.cloud import bigquery + + if not (is_proxy_token_set() or is_user_secrets_token_set()): + return bigquery + + # If this Notebook has bigquery integration on startup, preload the Kaggle Credentials + # object for magics to work. + if get_integrations().has_bigquery(): + from google.cloud.bigquery import magics + magics.context.credentials = KaggleKernelCredentials() + + def monkeypatch_bq(bq_client, *args, **kwargs): + from kaggle_gcp import get_integrations, PublicBigqueryClient, KaggleKernelCredentials + specified_credentials = kwargs.get('credentials') + has_bigquery = get_integrations().has_bigquery() + # Prioritize passed in project id, but if it is missing look for env var. + arg_project = kwargs.get('project') + explicit_project_id = arg_project or os.environ.get(environment_vars.PROJECT) + # This is a hack to get around the bug in google-cloud library. + # Remove these two lines once this is resolved: + # https://github.com/googleapis/google-cloud-python/issues/8108 + if explicit_project_id: + logging.info(f"Explicit project set to {explicit_project_id}") + kwargs['project'] = explicit_project_id + if explicit_project_id is None and specified_credentials is None and not has_bigquery: + msg = "Using Kaggle's public dataset BigQuery integration." + logging.info(msg) + print(msg) + return PublicBigqueryClient(*args, **kwargs) + else: + if specified_credentials is None: + logging.info("No credentials specified, using KaggleKernelCredentials.") + kwargs['credentials'] = KaggleKernelCredentials() + if (not has_bigquery): + logging.info("No bigquery integration found, creating client anyways.") + print('Please ensure you have selected a BigQuery ' + 'account in the Notebook Add-ons menu.') + if explicit_project_id is None: + logging.info("No project specified while using the unmodified client.") + print('Please ensure you specify a project id when creating the client' + ' in order to use your BigQuery account.') + kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) + return bq_client(*args, **kwargs) + + # Monkey patches BigQuery client creation to use proxy or user-connected GCP account. + # Deprecated in favor of Kaggle.DataProxyClient(). + # TODO: Remove this once uses have migrated to that new interface. + bq_client = bigquery.Client + if (not has_been_monkeypatched(bigquery.Client)): + bigquery.Client = lambda *args, **kwargs: monkeypatch_bq( + bq_client, *args, **kwargs) + return bigquery + +# Monkey patch for aiplatform init +# eg +# from google.cloud import aiplatform +# aiplatform.init(args) +def monkeypatch_aiplatform_init(aiplatform_klass, kaggle_kernel_credentials): + aiplatform_init = aiplatform_klass.init + def patched_init(*args, **kwargs): + specified_credentials = kwargs.get('credentials') + if specified_credentials is None: + logging.info("No credentials specified, using KaggleKernelCredentials.") + kwargs['credentials'] = kaggle_kernel_credentials + return aiplatform_init(*args, **kwargs) + + if (not has_been_monkeypatched(aiplatform_klass.init)): + aiplatform_klass.init = patched_init + logging.info("aiplatform.init patched") + +def monkeypatch_client(client_klass, kaggle_kernel_credentials): + client_init = client_klass.__init__ + def patched_init(self, *args, **kwargs): + specified_credentials = kwargs.get('credentials') + if specified_credentials is None: + logging.info("No credentials specified, using KaggleKernelCredentials.") + # Some GCP services demand the billing and target project must be the same. + # To avoid using default service account based credential as caller credential + # user need to provide ClientOptions with quota_project_id: + # srv.Client(client_options=client_options.ClientOptions(quota_project_id="YOUR PROJECT")) + client_options=kwargs.get('client_options') + if client_options != None and client_options.quota_project_id != None: + kwargs['credentials'] = KaggleKernelWithProjetCredentials( + parentCredential = kaggle_kernel_credentials, + quota_project_id = client_options.quota_project_id) + else: + kwargs['credentials'] = kaggle_kernel_credentials + + kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) + return client_init(self, *args, **kwargs) + + if (not has_been_monkeypatched(client_klass.__init__)): + client_klass.__init__ = patched_init + logging.info(f"Client patched: {client_klass}") + +def set_kaggle_user_agent(client_info: ClientInfo): + # Add kaggle client user agent in order to attribute usage. + if client_info is None: + client_info = ClientInfo(user_agent=KAGGLE_GCP_CLIENT_USER_AGENT) + else: + client_info.user_agent = KAGGLE_GCP_CLIENT_USER_AGENT + return client_info + +def init_gcs(): + from google.cloud import storage + if not is_user_secrets_token_set(): + return storage + + from kaggle_gcp import get_integrations + if not get_integrations().has_gcs(): + return storage + + from kaggle_secrets import GcpTarget + from kaggle_gcp import KaggleKernelCredentials + monkeypatch_client( + storage.Client, + KaggleKernelCredentials(target=GcpTarget.GCS)) + return storage + +def init_translation_v2(): + from google.cloud import translate_v2 + if not is_user_secrets_token_set(): + return translate_v2 + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return translate_v2 + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(translate_v2.Client, kernel_credentials) + return translate_v2 + +def init_translation_v3(): + # Translate v3 exposes different client than translate v2. + from google.cloud import translate_v3 + if not is_user_secrets_token_set(): + return translate_v3 + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return translate_v3 + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(translate_v3.TranslationServiceClient, kernel_credentials) + return translate_v3 + +def init_natural_language(): + from google.cloud import language + if not is_user_secrets_token_set(): + return language + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return language + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(language.LanguageServiceClient, kernel_credentials) + monkeypatch_client(language.LanguageServiceAsyncClient, kernel_credentials) + return language + +def init_ucaip(): + from google.cloud import aiplatform + if not is_user_secrets_token_set(): + return + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return + + from kaggle_secrets import GcpTarget + from kaggle_gcp import KaggleKernelCredentials + kaggle_kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + + # Patch the ucaip init method, this flows down to all ucaip services + monkeypatch_aiplatform_init(aiplatform, kaggle_kernel_credentials) + +def init_video_intelligence(): + from google.cloud import videointelligence + if not is_user_secrets_token_set(): + return videointelligence + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return videointelligence + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client( + videointelligence.VideoIntelligenceServiceClient, + kernel_credentials) + monkeypatch_client( + videointelligence.VideoIntelligenceServiceAsyncClient, + kernel_credentials) + return videointelligence + +def init_vision(): + from google.cloud import vision + if not is_user_secrets_token_set(): + return vision + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return vision + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(vision.ImageAnnotatorClient, kernel_credentials) + monkeypatch_client(vision.ImageAnnotatorAsyncClient, kernel_credentials) + return vision + +def init(): + init_bigquery() + init_gcs() + init_translation_v2() + init_translation_v3() + init_natural_language() + init_video_intelligence() + init_vision() + init_ucaip() + +# We need to initialize the monkeypatching of the client libraries +# here since there is a circular dependency between our import hook version +# google.cloud.* and kaggle_gcp. By calling init here, we guarantee +# that regardless of the original import that caused google.cloud.* to be +# loaded, the monkeypatching will be done. +init() \ No newline at end of file diff --git a/patches/kaggle_module_resolver.py b/patches/kaggle_module_resolver.py new file mode 100644 index 00000000..430cb980 --- /dev/null +++ b/patches/kaggle_module_resolver.py @@ -0,0 +1,22 @@ +import os +import re +import kagglehub + +from tensorflow_hub import resolver + +short_url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/(?P[^\\/]+)/(?P[^\\/]+)/(?P[^\\/]+)/(?P[^\\/]+)/(?P[0-9]+)$") +long_url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/(?P[^\\/]+)/(?P[^\\/]+)/frameworks/(?P[^\\/]+)/variations/(?P[^\\/]+)/versions/(?P[0-9]+)$") + +def _is_on_kaggle_notebook(): + return os.getenv("KAGGLE_KERNEL_RUN_TYPE") != None and os.getenv("KAGGLE_USER_SECRETS_TOKEN") != None + +def _is_kaggle_handle(handle): + return long_url_pattern.match(handle) != None or short_url_pattern.match(handle) != None + +class KaggleFileResolver(resolver.HttpResolverBase): + def is_supported(self, handle): + return _is_on_kaggle_notebook() and _is_kaggle_handle(handle) + + def __call__(self, handle): + m = long_url_pattern.match(handle) or short_url_pattern.match(handle) + return kagglehub.model_download(f"{m.group('owner')}/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}") diff --git a/patches/kaggle_secrets.py b/patches/kaggle_secrets.py new file mode 100644 index 00000000..a177c171 --- /dev/null +++ b/patches/kaggle_secrets.py @@ -0,0 +1,150 @@ +"""UserSecret client classes. +This library adds support for communicating with the UserSecrets service, +currently used for retrieving an access token for supported integrations +(ie. BigQuery). +""" + +import os +from datetime import datetime, timedelta +from enum import Enum, unique +import subprocess +from typing import Optional, Tuple +from kaggle_web_client import KaggleWebClient +from kaggle_web_client import (CredentialError, BackendError) + +class ValidationError(Exception): + pass + +class NotFoundError(Exception): + pass + +@unique +class GcpTarget(Enum): + """Enum class to store GCP targets.""" + BIGQUERY = (1, "BigQuery") + GCS = (2, "Google Cloud Storage") + # Old name, should remove later. + AUTOML = (3, "Cloud AutoML") + CLOUDAI = (3, "Google Cloud AI Platform") + + def __init__(self, target, service): + self._target = target + self._service = service + + @property + def target(self): + return self._target + + @property + def service(self): + return self._service + + +class UserSecretsClient(): + GET_USER_SECRET_ENDPOINT = '/requests/GetUserSecretRequest' + GET_USER_SECRET_BY_LABEL_ENDPOINT = '/requests/GetUserSecretByLabelRequest' + + def __init__(self): + self.web_client = KaggleWebClient() + + def get_secret(self, label) -> str: + """Retrieves a user secret value by its label. + + This returns the value of the secret with the given label, + if it attached to the current kernel. + Example usage: + client = UserSecretsClient() + secret = client.get_secret('my_db_password') + """ + if label is None or len(label) == 0: + raise ValidationError("Label must be non-empty.") + request_body = { + 'Label': label, + } + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_BY_LABEL_ENDPOINT) + if 'secret' not in response_json: + raise BackendError( + f'Unexpected response from the service. Response: {response_json}') + return response_json['secret'] + + def get_gcloud_credential(self) -> str: + """Retrieves the Google Cloud SDK credential attached to the current + kernel. + Example usage: + client = UserSecretsClient() + credential_json = client.get_gcloud_credential() + """ + try: + return self.get_secret("__gcloud_sdk_auth__") + except BackendError as backend_error: + message = str(backend_error.args) + if message.find('No user secrets exist') != -1: + raise NotFoundError('Google Cloud SDK credential not found.') + else: + raise + + def set_gcloud_credentials(self, project=None, account=None): + """Set user credentials attached to the current kernel and optionally the project & account name to the `gcloud` CLI. + + Example usage: + client = UserSecretsClient() + client.set_gcloud_credentials(project="my-gcp-project", account="me@my-org.com") + + !gcloud ai-platform jobs list + """ + creds = self.get_gcloud_credential() + creds_path = self._write_credentials_file(creds) + + subprocess.run(['gcloud', 'config', 'set', 'auth/credential_file_override', creds_path]) + + if project: + os.environ['GOOGLE_CLOUD_PROJECT'] = project + subprocess.run(['gcloud', 'config', 'set', 'project', project]) + + if account: + os.environ['GOOGLE_ACCOUNT'] = account + subprocess.run(['gcloud', 'config', 'set', 'account', account]) + + def set_tensorflow_credential(self, credential): + """Sets the credential for use by Tensorflow""" + + # Write to a local JSON credentials file + self._write_credentials_file(credential) + + def get_bigquery_access_token(self) -> Tuple[str, Optional[datetime]]: + """Retrieves BigQuery access token information from the UserSecrets service. + + This returns the token for the current kernel as well as its expiry (abs time) if it + is present. + Example usage: + client = UserSecretsClient() + token, expiry = client.get_bigquery_access_token() + """ + return self._get_access_token(GcpTarget.BIGQUERY) + + def _write_credentials_file(self, credentials) -> str: + adc_path = os.path.join(os.environ.get('HOME', '/'), 'gcloud_credential.json') + with open(adc_path, 'w') as f: + f.write(credentials) + os.environ['GOOGLE_APPLICATION_CREDENTIALS']=adc_path + + return adc_path + + def _get_gcs_access_token(self) -> Tuple[str, Optional[datetime]]: + return self._get_access_token(GcpTarget.GCS) + + def _get_cloudai_access_token(self) -> Tuple[str, Optional[datetime]]: + return self._get_access_token(GcpTarget.CLOUDAI) + + def _get_access_token(self, target: GcpTarget) -> Tuple[str, Optional[datetime]]: + request_body = { + 'Target': target.target + } + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_ENDPOINT) + if 'secret' not in response_json: + raise BackendError( + f'Unexpected response from the service. Response: {response_json}') + # Optionally return expiry if it is set. + expiresInSeconds = response_json.get('expiresInSeconds') + expiry = datetime.utcnow() + timedelta(seconds=expiresInSeconds) if expiresInSeconds else None + return response_json['secret'], expiry diff --git a/patches/kaggle_session.py b/patches/kaggle_session.py new file mode 100644 index 00000000..30679c86 --- /dev/null +++ b/patches/kaggle_session.py @@ -0,0 +1,26 @@ +""" +This library adds support for retrieving data related to the current user session. +""" + +import os + +from kaggle_web_client import KaggleWebClient + + +class UserSessionClient(): + GET_SOURCE_ENDPOINT = '/requests/GetKernelRunSourceForCaipRequest' + + def __init__(self): + self.web_client = KaggleWebClient() + + def get_exportable_ipynb(self): + """Fetch the .ipynb source of the current notebook session. + + If Kaggle datasets are attached to the notebook, the source will + include an additonnal cell with logic to download the datasets + outside the Kaggle platform. + """ + request_body = { + 'UseDraft': True, + } + return self.web_client.make_post_request(request_body, self.GET_SOURCE_ENDPOINT) diff --git a/patches/kaggle_web_client.py b/patches/kaggle_web_client.py new file mode 100644 index 00000000..f7b7ae8b --- /dev/null +++ b/patches/kaggle_web_client.py @@ -0,0 +1,65 @@ +import json +import os +import socket +import urllib.request +from urllib.error import HTTPError, URLError + +_KAGGLE_DEFAULT_URL_BASE = "https://www.kaggle.com" +_KAGGLE_URL_BASE_ENV_VAR_NAME = "KAGGLE_URL_BASE" +_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME = "KAGGLE_USER_SECRETS_TOKEN" +_KAGGLE_IAP_TOKEN_ENV_VAR_NAME = "KAGGLE_IAP_TOKEN" +TIMEOUT_SECS = 40 + +class CredentialError(Exception): + pass + + +class BackendError(Exception): + pass + + +class KaggleWebClient: + + def __init__(self): + url_base_override = os.getenv(_KAGGLE_URL_BASE_ENV_VAR_NAME) + self.url_base = url_base_override or _KAGGLE_DEFAULT_URL_BASE + # Follow the OAuth 2.0 Authorization standard (https://tools.ietf.org/html/rfc6750) + self.jwt_token = os.getenv(_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME) + if self.jwt_token is None: + raise CredentialError( + 'A JWT Token is required to call Kaggle, ' + f'but none found in environment variable {_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME}') + self.headers = { + 'Content-type': 'application/json', + 'X-Kaggle-Authorization': f'Bearer {self.jwt_token}', + } + iap_token = os.getenv(_KAGGLE_IAP_TOKEN_ENV_VAR_NAME) + if iap_token: + self.headers['Authorization'] = f'Bearer {iap_token}' + + def make_post_request(self, data: dict, endpoint: str, timeout: int = TIMEOUT_SECS) -> dict: + url = f'{self.url_base}{endpoint}' + request_body = dict(data) + req = urllib.request.Request(url, headers=self.headers, data=bytes( + json.dumps(request_body), encoding="utf-8")) + try: + with urllib.request.urlopen(req, timeout=timeout) as response: + response_json = json.loads(response.read()) + if not response_json.get('wasSuccessful') or 'result' not in response_json: + raise BackendError( + f'Unexpected response from the service. Response: {response_json}.') + return response_json['result'] + except (URLError, socket.timeout) as e: + if isinstance( + e, socket.timeout) or isinstance( + e.reason, socket.timeout): + raise ConnectionError( + 'Timeout error trying to communicate with service. Please ensure internet is on.') from e + raise ConnectionError( + 'Connection error trying to communicate with service.') from e + except HTTPError as e: + if e.code == 401 or e.code == 403: + raise CredentialError( + f'Service responded with error code {e.code}.' + ' Please ensure you have access to the resource.') from e + raise BackendError('Unexpected response from the service.') from e diff --git a/patches/keras_internal.py b/patches/keras_internal.py new file mode 100644 index 00000000..e28127f9 --- /dev/null +++ b/patches/keras_internal.py @@ -0,0 +1,24 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Access to Keras function with a different internal and external path.""" + +from tf_keras.src.engine import data_adapter as _data_adapter +from tf_keras.src.models import Functional +from tf_keras.layers import DenseFeatures +from tf_keras.src.utils.dataset_creator import DatasetCreator + + +unpack_x_y_sample_weight = _data_adapter.unpack_x_y_sample_weight +get_data_handler = _data_adapter.get_data_handler diff --git a/patches/keras_internal_test.py b/patches/keras_internal_test.py new file mode 100644 index 00000000..edc33ec2 --- /dev/null +++ b/patches/keras_internal_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from tensorflow_decision_forests.keras import keras_internal + + +# Does nothing. Ensures keras_internal can be loaded. + +if __name__ == "__main__": + tf.test.main() + diff --git a/patches/nbconvert-extensions.tpl b/patches/nbconvert-extensions.tpl index b3de090d..e40f0013 100644 --- a/patches/nbconvert-extensions.tpl +++ b/patches/nbconvert-extensions.tpl @@ -4,7 +4,7 @@ All cell metadata starting with '_kg_' will be included with its value ({key}-{v as a class in the cell's DIV container #} -{% extends 'full.tpl'%} +{% extends 'classic/index.html.j2'%} {% block any_cell %}
{{ super() }} diff --git a/patches/sitecustomize.py b/patches/sitecustomize.py index 7447f9cc..1bb8a1b6 100644 --- a/patches/sitecustomize.py +++ b/patches/sitecustomize.py @@ -1,22 +1,143 @@ -# TODO(rosbo): Remove this once we fix the issue with fastai importing older libcudnn if imported prior to tensorflow -import tensorflow +import logging import os -# Monkey patches BigQuery client creation to use proxy. -kaggle_proxy_token = os.getenv("KAGGLE_DATA_PROXY_TOKEN") -if kaggle_proxy_token: - from google.auth import credentials - from google.cloud import bigquery - from google.cloud.bigquery._http import Connection +import sys +import importlib.abc +import importlib +import importlib.machinery - Connection.API_BASE_URL = os.getenv("KAGGLE_DATA_PROXY_URL") - Connection._EXTRA_HEADERS["X-KAGGLE-PROXY-DATA"] = kaggle_proxy_token +import wrapt - bq_client = bigquery.Client - bigquery.Client = lambda *args, **kwargs: bq_client( - *args, - credentials=credentials.AnonymousCredentials(), - project=os.getenv("KAGGLE_DATA_PROXY_PROJECT"), - **kwargs) +class GcpModuleFinder(importlib.abc.MetaPathFinder): + _MODULES = [ + 'google.cloud.bigquery', + 'google.cloud.storage', + 'google.cloud.translate', + 'google.cloud.translate_v2', + 'google.cloud.translate_v3', + 'google.cloud.language', + 'google.cloud.language_v1', + 'google.cloud.videointelligence', + 'google.cloud.videointelligence_v1', + 'google.cloud.vision', + 'google.cloud.vision_v1', + ] + _KAGGLE_GCP_PATH = 'kaggle_gcp.py' + def __init__(self): + pass - credentials.AnonymousCredentials.refresh = lambda *args: None + def _is_called_from_kaggle_gcp(self): + import inspect + for frame in inspect.stack(): + if os.path.basename(frame.filename) == self._KAGGLE_GCP_PATH: + return True + return False + + def find_spec(self, fullname, path, target=None): + if fullname in self._MODULES: + # If being called from kaggle_gcp, don't return our + # monkeypatched module to avoid circular dependency, + # since we call kaggle_gcp to load the module. + if self._is_called_from_kaggle_gcp(): + return None + return importlib.machinery.ModuleSpec(fullname, GcpModuleLoader()) + + +class GcpModuleLoader(importlib.abc.Loader): + def __init__(self): + pass + + def create_module(self, spec): + """Create the gcp module from the spec. + """ + import kaggle_gcp + _LOADERS = { + 'google.cloud.bigquery': kaggle_gcp.init_bigquery, + 'google.cloud.storage': kaggle_gcp.init_gcs, + 'google.cloud.translate': kaggle_gcp.init_translation_v3, + 'google.cloud.translate_v2': kaggle_gcp.init_translation_v2, + 'google.cloud.translate_v3': kaggle_gcp.init_translation_v3, + 'google.cloud.language': kaggle_gcp.init_natural_language, + 'google.cloud.language_v1': kaggle_gcp.init_natural_language, + 'google.cloud.videointelligence': kaggle_gcp.init_video_intelligence, + 'google.cloud.videointelligence_v1': kaggle_gcp.init_video_intelligence, + 'google.cloud.vision': kaggle_gcp.init_vision, + 'google.cloud.vision_v1': kaggle_gcp.init_vision + } + monkeypatch_gcp_module = _LOADERS[spec.name]() + return monkeypatch_gcp_module + + def exec_module(self, module): + pass + +if not hasattr(sys, 'frozen'): + sys.meta_path.insert(0, GcpModuleFinder()) + +@wrapt.when_imported('google.generativeai') +def post_import_logic(module): + if os.getenv('KAGGLE_DISABLE_GOOGLE_GENERATIVE_AI_INTEGRATION') != None: + return + if (os.getenv('KAGGLE_DATA_PROXY_TOKEN') == None or + os.getenv('KAGGLE_USER_SECRETS_TOKEN') == None or + (os.getenv('KAGGLE_DATA_PROXY_URL') == None and + os.getenv('KAGGLE_GRPC_DATA_PROXY_URL') == None)): + return + + old_configure = module.configure + + def new_configure(*args, **kwargs): + if ('default_metadata' in kwargs): + default_metadata = kwargs['default_metadata'] + else: + default_metadata = [] + default_metadata.append(("x-kaggle-proxy-data", os.environ['KAGGLE_DATA_PROXY_TOKEN'])) + user_secrets_token = os.environ['KAGGLE_USER_SECRETS_TOKEN'] + default_metadata.append(('x-kaggle-authorization', f'Bearer {user_secrets_token}')) + kwargs['default_metadata'] = default_metadata + + if ('client_options' in kwargs): + client_options = kwargs['client_options'] + else: + client_options = {} + + if os.getenv('KAGGLE_GOOGLE_GENERATIVE_AI_USE_REST_ONLY') != None: + kwargs['transport'] = 'rest' + + if 'transport' in kwargs and kwargs['transport'] == 'rest': + client_options['api_endpoint'] = os.environ['KAGGLE_DATA_PROXY_URL'] + client_options['api_endpoint'] += '/palmapi' + else: + client_options['api_endpoint'] = os.environ['KAGGLE_GRPC_DATA_PROXY_URL'] + kwargs['client_options'] = client_options + + old_configure(*args, **kwargs) + + module.configure = new_configure + module.configure() # generativeai can use GOOGLE_API_KEY env variable, so make sure we have the other configs set + +@wrapt.when_imported('google.genai') +def post_genai_import_logic(module): + if os.getenv('KAGGLE_DISABLE_GOOGLE_GENERATIVE_AI_INTEGRATION'): + return + + if not (os.getenv('KAGGLE_DATA_PROXY_TOKEN') and + os.getenv('KAGGLE_USER_SECRETS_TOKEN') and + os.getenv('KAGGLE_DATA_PROXY_URL')): + return + @wrapt.patch_function_wrapper(module, 'Client.__init__') + def init_wrapper(wrapped, instance, args, kwargs): + # Don't want to forward requests that are to Vertex AI, debug mode, or have their own http_options specified + # Thus, if the client constructor contains any params other than api_key, we don't set up forwarding + if any(value is not None for key, value in kwargs.items() if key != 'api_key'): + return wrapped(*args, **kwargs) + + default_metadata = { + "x-kaggle-proxy-data": os.environ['KAGGLE_DATA_PROXY_TOKEN'], + 'x-kaggle-authorization': f"Bearer {os.environ['KAGGLE_USER_SECRETS_TOKEN']}" + } + http_options = { + 'base_url': os.getenv('KAGGLE_DATA_PROXY_URL') + '/palmapi/', + 'headers': default_metadata + } + kwargs['http_options'] = http_options + return wrapped(*args, **kwargs) diff --git a/patches/template_conf.json b/patches/template_conf.json new file mode 100644 index 00000000..49cc88c2 --- /dev/null +++ b/patches/template_conf.json @@ -0,0 +1,13 @@ +{ + "base_template": "classic", + "mimetypes": { + "text/html": true + }, + "preprocessors": { + "100-pygments": { + "type": "nbconvert.preprocessors.CSSHTMLHeaderPreprocessor", + "enabled": true, + "style": "default" + } + } +} \ No newline at end of file diff --git a/patches/tensorboard/notebook.py b/patches/tensorboard/notebook.py new file mode 100644 index 00000000..7375f97a --- /dev/null +++ b/patches/tensorboard/notebook.py @@ -0,0 +1,62 @@ +"""%tensorboard line magic that patches TensorBoard's implementation to make use of Jupyter +TensorBoard server extension providing built-in proxying. + +Use: + %load_ext tensorboard.notebook + %tensorboard --logdir /logs +""" + +import argparse +import uuid + +from IPython.display import display, HTML, Javascript + +def _tensorboard_magic(line): + """Line magic function. + + Makes an AJAX call to the Jupyter TensorBoard server extension and outputs + an IFrame displaying the TensorBoard instance. + """ + parser = argparse.ArgumentParser() + parser.add_argument('--logdir', default='/kaggle/working') + args = parser.parse_args(line.split()) + + iframe_id = 'tensorboard-' + str(uuid.uuid4()) + + html = """ + + + + +""" % (args.logdir, iframe_id, iframe_id) + + display(HTML(html)) + +def load_ipython_extension(ipython): + """IPython extension entry point.""" + ipython.register_magic_function( + _tensorboard_magic, + magic_kind='line', + magic_name='tensorboard', + ) \ No newline at end of file diff --git a/push b/push index c3ebcaf4..124a3469 100755 --- a/push +++ b/push @@ -7,11 +7,14 @@ Usage: $0 [OPTIONS] [LABEL] Push a newly-built image with the given LABEL to gcr.io and DockerHub. Options: - -g, --gpu Push the image with GPU support. + -g, --gpu Push the image with GPU support. + -t, --tpu Push the image with GPU support. + -s, --source-image IMAGE Tag for the source image. EOF } -SOURCE_IMAGE='kaggle/python-build' +SOURCE_IMAGE_TAG='kaggle/python-build:latest' +SOURCE_IMAGE_TAG_OVERRIDE='' TARGET_IMAGE='gcr.io/kaggle-images/python' while :; do @@ -21,9 +24,22 @@ while :; do exit ;; -g|--gpu) - SOURCE_IMAGE='kaggle/python-gpu-build' + SOURCE_IMAGE_TAG='kaggle/python-gpu-build:latest' TARGET_IMAGE='gcr.io/kaggle-private-byod/python' ;; + -t|--tpu) + SOURCE_IMAGE_TAG='kaggle/python-tpuvm-build:latest' + TARGET_IMAGE='gcr.io/kaggle-private-byod/python-tpuvm' + ;; + -s|--source-image) + if [[ -z $2 ]]; then + usage + printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + SOURCE_IMAGE_TAG_OVERRIDE=$2 + shift # skip the flag value + ;; -?*) usage printf 'ERROR: Unknown option: %s\n' "$1" >&2 @@ -38,16 +54,14 @@ done LABEL=${1:-testing} -readonly SOURCE_IMAGE +if [[ -n "$SOURCE_IMAGE_TAG_OVERRIDE" ]]; then + SOURCE_IMAGE_TAG="$SOURCE_IMAGE_TAG_OVERRIDE" +fi + +readonly SOURCE_IMAGE_TAG readonly TARGET_IMAGE readonly LABEL set -x -docker tag "${SOURCE_IMAGE}:latest" "${TARGET_IMAGE}:${LABEL}" +docker tag "${SOURCE_IMAGE_TAG}" "${TARGET_IMAGE}:${LABEL}" gcloud docker -- push "${TARGET_IMAGE}:${LABEL}" - -# Only CPU images are made public at this time. -if [[ "$LABEL" == "latest" && SOURCE_IMAGE = "kaggle/python-build" ]]; then - docker tag "${SOURCE_IMAGE}:latest" "kaggle/python:${LABEL}" - docker push "kaggle/python:${LABEL}" -fi diff --git a/renderizer/Dockerfile b/renderizer/Dockerfile new file mode 100644 index 00000000..9faac229 --- /dev/null +++ b/renderizer/Dockerfile @@ -0,0 +1,12 @@ +# Image used to generate the Dockerfiles from a Go text template. +# +# Build: +# docker build --rm --pull -t gcr.io/kaggle-images/go-renderizer -f Dockerfile . +# +# Push: +# docker push gcr.io/kaggle-images/go-renderizer +FROM golang:1.17 + +RUN go install github.com/gomatic/renderizer/v2/cmd/renderizer@v2.0.13 + +ENTRYPOINT ["renderizer"] \ No newline at end of file diff --git a/tensorflow-whl/CHANGELOG.md b/tensorflow-whl/CHANGELOG.md deleted file mode 100644 index 52ab3b13..00000000 --- a/tensorflow-whl/CHANGELOG.md +++ /dev/null @@ -1 +0,0 @@ -1.11.0-py36: Tensorflow 1.11.0 wheels built with python 3.6 diff --git a/tensorflow-whl/Dockerfile b/tensorflow-whl/Dockerfile deleted file mode 100644 index bcafa4fd..00000000 --- a/tensorflow-whl/Dockerfile +++ /dev/null @@ -1,107 +0,0 @@ -FROM nvidia/cuda:9.1-cudnn7-devel-ubuntu16.04 AS nvidia -FROM continuumio/anaconda3:5.0.1 - -# Avoid interactive configuration prompts/dialogs during apt-get. -ENV DEBIAN_FRONTEND=noninteractive - -# This is necessary to for apt to access HTTPS sources -RUN apt-get update && \ - apt-get install apt-transport-https - -# Cuda support -COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg - -# Ensure the cuda libraries are compatible with the GPU image. -# TODO(b/120050292): Use templating to keep in sync. -ENV CUDA_VERSION=9.1.85 -ENV CUDA_PKG_VERSION=9-1=$CUDA_VERSION-1 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems. -# When intended to be used with actual GPUs, make sure to (besides providing access to the host -# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One -# convenient way to do so is to obscure its contents by a bind mount: -# docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ... -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV NVIDIA_REQUIRE_CUDA="cuda>=9.0" -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cupti-$CUDA_PKG_VERSION \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-cudart-dev-$CUDA_PKG_VERSION \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libcudnn7=7.2.1.38-1+cuda9.0 \ - libcudnn7-dev=7.2.1.38-1+cuda9.0 \ - libnccl2=2.2.12-1+cuda9.1 \ - libnccl-dev=2.2.12-1+cuda9.1 && \ - ln -s /usr/local/cuda-9.1 /usr/local/cuda && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ - rm -rf /var/lib/apt/lists/* - -# Install bazel -ENV BAZEL_VERSION=0.15.0 -RUN apt-get update && apt-get install -y python-software-properties zip && \ - echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && \ - echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu precise main" | tee -a /etc/apt/sources.list && \ - apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys EEA14886 C857C906 2B90D010 && \ - apt-get update && \ - echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \ - echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \ - apt-get install -y oracle-java8-installer && \ - apt-get install -y --no-install-recommends \ - bash-completion \ - zlib1g-dev && \ - curl -LO "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel_${BAZEL_VERSION}-linux-x86_64.deb" && \ - dpkg -i bazel_*.deb && \ - rm bazel_*.deb - -# Tensorflow doesn't support python 3.7 yet. See https://github.com/tensorflow/tensorflow/issues/20517 -RUN conda install -y python=3.6.6 && \ - # Another fix for TF 1.10 https://github.com/tensorflow/tensorflow/issues/21518 - pip install keras_applications==1.0.4 --no-deps && \ - pip install keras_preprocessing==1.0.2 --no-deps - -# Fetch tensorflow -RUN cd /usr/local/src && \ - git clone https://github.com/tensorflow/tensorflow && \ - cd tensorflow && \ - git checkout r1.11 - -# Create a tensorflow wheel for CPU -RUN cd /usr/local/src/tensorflow && \ - cat /dev/null | ./configure && \ - bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_cpu && \ - bazel clean - -# Create a tensorflow wheel for GPU/cuda -ENV TF_NEED_CUDA=1 -ENV TF_CUDA_VERSION=9.1 -ENV TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.0 -ENV TF_CUDNN_VERSION=7 -ENV TF_NCCL_VERSION=2 -ENV NCCL_INSTALL_PATH=/usr/ - -RUN cd /usr/local/src/tensorflow && \ - # TF_NCCL_INSTALL_PATH is used for both libnccl.so.2 and libnccl.h. Make sure they are both accessible from the same directory. - ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/ && \ - cat /dev/null | ./configure && \ - echo "/usr/local/cuda-${TF_CUDA_VERSION}/targets/x86_64-linux/lib/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \ - bazel build --config=opt \ - --config=cuda \ - --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ - //tensorflow/tools/pip_package:build_pip_package && \ - rm /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_gpu && \ - bazel clean - -# Print out the built .whl files -RUN ls -R /tmp/tensorflow* diff --git a/tensorflow-whl/README.md b/tensorflow-whl/README.md deleted file mode 100644 index 509591a2..00000000 --- a/tensorflow-whl/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Build new Tensorflow wheels - -``` -./build -``` - -# Push the new wheels - -1. Add an entry in the [CHANGELOG](CHANGELOG.md) with an appropriate `LABEL`. -2. Push the new image using the `LABEL` you picked above. - - ``` - ./push LABEL - ``` - -# Use the new wheels - -Update the line below in the [CPU Dockerfile](../Dockerfile) and the [GPU Dockerfile](../gpu.Dockerfile) to use the new `LABEL`. - -``` -FROM gcr.io/kaggle-images/python-tensorflow-whl: