diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..e6990cd3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,23 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: bug, help wanted +assignees: '' +--- + +## 🐛 Bug + + + +### To Reproduce + + + +### Expected behavior + + + +### Additional context + + diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..d999a7b8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: enhancement +assignees: '' +--- + +## 🚀 Feature + + + +### Motivation + + + +### Additional context + + diff --git a/.gitignore b/.gitignore index 0d038d25..ef82380f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc .idea/ .vscode -.mypy_cache \ No newline at end of file +.mypy_cache +.generated \ No newline at end of file diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index a99353d2..00000000 --- a/Dockerfile +++ /dev/null @@ -1,549 +0,0 @@ -ARG BASE_TAG=2019.03 - -FROM gcr.io/kaggle-images/python-tensorflow-whl:1.14.0-py36 as tensorflow_whl -FROM continuumio/anaconda3:${BASE_TAG} - -ARG GIT_COMMIT=unknown -ARG BUILD_DATE=unknown - -LABEL git-commit=$GIT_COMMIT -LABEL build-date=$BUILD_DATE - -# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`. -RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date - -ADD clean-layer.sh /tmp/clean-layer.sh -ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl - -# This is necessary for apt to access HTTPS sources -RUN apt-get update && \ - apt-get install apt-transport-https && \ - /tmp/clean-layer.sh - - # Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, - # as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 -RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ - apt-get update && apt-get install -y build-essential unzip cmake && \ - # Work to upgrade to Python 3.7 can be found on this branch: https://github.com/Kaggle/docker-python/blob/upgrade-py37/Dockerfile - conda install -y python=3.6.6 && \ - pip install --upgrade pip && \ - /tmp/clean-layer.sh - -# The anaconda base image includes outdated versions of these packages. Update them to include the latest version. -RUN pip install seaborn python-dateutil dask && \ - pip install pyyaml joblib pytagcloud husl geopy ml_metrics mne pyshp && \ - pip install spacy && python -m spacy download en && python -m spacy download en_core_web_lg && \ - # The apt-get version of imagemagick is out of date and has compatibility issues, so we build from source - apt-get -y install dbus fontconfig fontconfig-config fonts-dejavu-core fonts-droid-fallback ghostscript gsfonts hicolor-icon-theme \ - libavahi-client3 libavahi-common-data libavahi-common3 libcairo2 libcap-ng0 libcroco3 \ - libcups2 libcupsfilters1 libcupsimage2 libdatrie1 libdbus-1-3 libdjvulibre-text libdjvulibre21 libfftw3-double3 libfontconfig1 \ - libfreetype6 libgdk-pixbuf2.0-0 libgdk-pixbuf2.0-common libgomp1 libgraphite2-3 libgs9 libgs9-common libharfbuzz0b libijs-0.35 \ - libilmbase12 libjbig0 libjbig2dec0 libjpeg62-turbo liblcms2-2 liblqr-1-0 libltdl7 libmagickcore-6.q16-3 \ - libmagickcore-6.q16-3-extra libmagickwand-6.q16-3 libnetpbm10 libopenexr22 libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 \ - libpaper-utils libpaper1 libpixman-1-0 libpng16-16 librsvg2-2 librsvg2-common libthai-data libthai0 libtiff5 libwmf0.2-7 \ - libxcb-render0 libxcb-shm0 netpbm poppler-data p7zip-full python3-rtree && \ - cd /usr/local/src && \ - wget --no-verbose https://imagemagick.org/download/ImageMagick.tar.gz && \ - tar xzf ImageMagick.tar.gz && cd `ls -d ImageMagick-*` && pwd && ls -al && ./configure && \ - make -j $(nproc) && make install && \ - /tmp/clean-layer.sh - -# Install tensorflow from a pre-built wheel -COPY --from=tensorflow_whl /tmp/tensorflow_cpu/*.whl /tmp/tensorflow_cpu/ -RUN pip install /tmp/tensorflow_cpu/tensorflow*.whl && \ - rm -rf /tmp/tensorflow_cpu && \ - /tmp/clean-layer.sh - -RUN apt-get install -y libfreetype6-dev && \ - apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ - pip install gensim && \ - pip install textblob && \ - pip install wordcloud && \ - conda install -y -c conda-forge python-igraph && \ - pip install xgboost && \ - pip install lightgbm && \ - pip install git+git://github.com/Lasagne/Lasagne.git && \ - pip install keras && \ - pip install keras-rl && \ - #keras-rcnn - pip install git+https://github.com/broadinstitute/keras-rcnn && \ - # version 3.7.1 adds a dependency on entrypoints > 3. This causes a reinstall but fails because - # it is a distutils package and can't be uninstalled. Once the anaconda image in updated, this - # pin should be removed. - pip install flake8==3.6.0 && \ - #neon - cd /usr/local/src && \ - git clone --depth 1 https://github.com/NervanaSystems/neon.git && \ - cd neon && pip install . && \ - #nolearn - cd /usr/local/src && mkdir nolearn && cd nolearn && \ - git clone --depth 1 https://github.com/dnouri/nolearn.git && cd nolearn && \ - echo "x" > README.rst && echo "x" > CHANGES.rst && \ - python setup.py install && \ - # Dev branch of Theano - pip install git+git://github.com/Theano/Theano.git --upgrade --no-deps && \ - # put theano compiledir inside /tmp (it needs to be in writable dir) - printf "[global]\nbase_compiledir = /tmp/.theano\n" > /.theanorc && \ - cd /usr/local/src && git clone --depth 1 https://github.com/pybrain/pybrain && \ - cd pybrain && python setup.py install && \ - # Base ATLAS - apt-get install -y libatlas-base-dev && \ - cd /usr/local/src && git clone --depth 1 https://github.com/ztane/python-Levenshtein && \ - cd python-Levenshtein && python setup.py install && \ - pip install hep_ml && \ - # chainer - pip install chainer && \ - # NLTK Project datasets - mkdir -p /usr/share/nltk_data && \ - # NLTK Downloader no longer continues smoothly after an error, so we explicitly list - # the corpuses that work - # "yes | ..." answers yes to the retry prompt in case of an error. See b/133762095. - yes | python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ - basque_grammars biocreative_ppi bllip_wsj_no_aux \ - book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ - comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ - europarl_raw floresta gazetteers genesis gutenberg \ - ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ - masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ - mte_teip5 names nps_chat omw opinion_lexicon paradigms \ - pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ - pros_cons ptb punkt qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ - sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ - state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ - twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ - vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \ - # Stop-words - pip install stop-words && \ - pip install --upgrade scikit-image && \ - /tmp/clean-layer.sh - -# Make sure the dynamic linker finds the right libstdc++ -ENV LD_LIBRARY_PATH=/opt/conda/lib - -RUN apt-get -y install zlib1g-dev liblcms2-dev libwebp-dev libgeos-dev && \ - pip install matplotlib && \ - pip install pyshp && \ - pip install pyproj && \ - conda install basemap && \ - # sasl is apparently an ibis dependency - apt-get -y install libsasl2-dev && \ - # ...as is psycopg2 - apt-get install -y libpq-dev && \ - pip install ibis-framework && \ - # Cartopy plus dependencies - yes | conda install proj4 && \ - pip install packaging && \ - pip install shapely && \ - pip install cartopy && \ - # MXNet - pip install mxnet && \ - pip install --upgrade numpy && \ - pip install gluonnlp && \ - pip install gluoncv && \ - # h2o (requires java) - # requires java - apt-get install -y default-jdk && \ - cd /usr/local/src && mkdir h2o && cd h2o && \ - wget --no-verbose http://h2o-release.s3.amazonaws.com/h2o/latest_stable -O latest && \ - wget --no-verbose --no-check-certificate -i latest -O h2o.zip && rm latest && \ - unzip h2o.zip && rm h2o.zip && cp h2o-*/h2o.jar . && \ - pip install `find . -name "*whl"` && \ - /tmp/clean-layer.sh - -# b/128333086: Set PROJ_LIB to points to the proj4 cartographic library. -ENV PROJ_LIB=/opt/conda/share/proj - -# scikit-learn dependencies -RUN pip install scipy && \ - pip install scikit-learn && \ - # HDF5 support - pip install h5py && \ - pip install biopython && \ - # PUDB, for local debugging convenience - pip install pudb && \ - pip install imbalanced-learn && \ - # Convex Optimization library - # Latest version fails to install, see https://github.com/cvxopt/cvxopt/issues/77 - # and https://github.com/cvxopt/cvxopt/issues/80 - # pip install cvxopt && \ - # Profiling and other utilities - pip install line_profiler && \ - pip install orderedmultidict && \ - pip install smhasher && \ - pip install bokeh && \ - # b/134599839: latest version requires llvmlite >= 0.39.0. Base image comes with 0.38.0. - # It fails to reinstall it because it is a distutil package. Remove pin once base image include newer verson of llvmlite. - pip install numba==0.38.0 && \ - pip install datashader && \ - # Boruta (python implementation) - pip install Boruta && \ - cd /usr/local/src && git clone git://github.com/nicolashennetier/pyeconometrics.git && \ - cd pyeconometrics && python setup.py install && \ - apt-get install -y graphviz && pip install graphviz && \ - # Pandoc is a dependency of deap - apt-get install -y pandoc && \ - pip install git+git://github.com/scikit-learn-contrib/py-earth.git@issue191 && \ - pip install essentia && \ - # PyTorch - export CXXFLAGS="-std=c++11" && \ - export CFLAGS="-std=c99" && \ - conda install -y pytorch-cpu torchvision-cpu -c pytorch && \ - # PyTorch Audio - apt-get install -y sox libsox-dev libsox-fmt-all && \ - pip install cffi && \ - pip install git+git://github.com/pytorch/audio.git && \ - /tmp/clean-layer.sh - -# vtk with dependencies -RUN apt-get install -y libgl1-mesa-glx && \ - pip install vtk && \ - # xvfbwrapper with dependencies - apt-get install -y xvfb && \ - pip install xvfbwrapper && \ - /tmp/clean-layer.sh - -RUN pip install mpld3 && \ - pip install mplleaflet && \ - pip install gpxpy && \ - pip install arrow && \ - pip install nilearn && \ - pip install nibabel && \ - pip install pronouncing && \ - pip install markovify && \ - pip install rf_perm_feat_import && \ - pip install imgaug && \ - pip install preprocessing && \ - pip install Baker && \ - pip install path.py && \ - pip install Geohash && \ - # https://github.com/vinsci/geohash/issues/4 - sed -i -- 's/geohash/.geohash/g' /opt/conda/lib/python3.6/site-packages/Geohash/__init__.py && \ - pip install deap && \ - pip install tpot && \ - pip install scikit-optimize && \ - pip install haversine && \ - pip install toolz cytoolz && \ - pip install sacred && \ - pip install plotly && \ - pip install git+https://github.com/nicta/dora.git && \ - pip install git+https://github.com/hyperopt/hyperopt.git && \ - # tflean. Deep learning library featuring a higher-level API for TensorFlow. http://tflearn.org - pip install git+https://github.com/tflearn/tflearn.git && \ - pip install fitter && \ - pip install langid && \ - # Delorean. Useful for dealing with datetime - pip install delorean && \ - pip install trueskill && \ - pip install heamy && \ - pip install vida && \ - # Useful data exploration libraries (for missing data and generating reports) - pip install missingno && \ - pip install pandas-profiling && \ - pip install s2sphere && \ - pip install git+https://github.com/fmfn/BayesianOptimization.git && \ - pip install matplotlib-venn && \ - pip install pyldavis && \ - pip install mlxtend && \ - pip install altair && \ - pip install pystan && \ - pip install ImageHash && \ - pip install ecos && \ - pip install CVXcanon && \ - pip install fancyimpute && \ - pip install git+https://github.com/pymc-devs/pymc3 && \ - pip install tifffile && \ - pip install spectral && \ - pip install descartes && \ - pip install geojson && \ - pip install pysal && \ - pip install pyflux && \ - pip install terminalplot && \ - pip install raccoon && \ - pip install pydicom && \ - pip install wavio && \ - pip install SimpleITK && \ - pip install hmmlearn && \ - pip install bayespy && \ - pip install gplearn && \ - pip install PyAstronomy && \ - pip install squarify && \ - pip install fuzzywuzzy && \ - pip install python-louvain && \ - pip install pyexcel-ods && \ - pip install sklearn-pandas && \ - pip install stemming && \ - pip install fbprophet && \ - pip install holoviews && \ - # 1.6.2 is not currently supported by the version of matplotlib we are using. - # See other comments about why matplotlib is pinned. - pip install geoviews==1.6.1 && \ - pip install hypertools && \ - pip install py_stringsimjoin && \ - pip install speedml && \ - pip install nibabel && \ - pip install mlens && \ - pip install scikit-multilearn && \ - pip install cleverhans && \ - pip install leven && \ - pip install catboost && \ - #cd /usr/local/src && git clone --depth=1 https://github.com/AxeldeRomblay/MLBox && cd MLBox/python-package && python setup.py install && \ - pip install fastFM && \ - pip install lightfm && \ - pip install paramnb && \ - pip install folium && \ - pip install scikit-plot && \ - # dipy requires the optional fury dependency for visualizations. - pip install fury dipy && \ - # plotnine 0.5 is depending on matplotlib >= 3.0 which is not compatible with basemap. - # once basemap support matplotlib, we can unpin this package. - pip install plotnine==0.4.0 && \ - pip install git+https://github.com/dvaida/hallucinate.git && \ - pip install scikit-surprise && \ - pip install pymongo && \ - pip install edward && \ - pip install geoplot && \ - pip install eli5 && \ - pip install implicit && \ - pip install dask-ml[xgboost] && \ - /tmp/clean-layer.sh - -RUN pip install kmeans-smote --no-dependencies && \ - # Add google PAIR-code Facets - cd /opt/ && git clone https://github.com/PAIR-code/facets && cd facets/ && jupyter nbextension install facets-dist/ --user && \ - export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \ - pip install --no-dependencies ethnicolr && \ - pip install tensorpack && \ - pip install pycountry && pip install iso3166 && \ - pip install pydash && \ - pip install kmodes --no-dependencies && \ - pip install librosa && \ - pip install polyglot && \ - pip install mmh3 && \ - pip install fbpca && \ - pip install sentencepiece && \ - pip install cufflinks && \ - pip install glmnet_py && \ - pip install lime && \ - pip install memory_profiler && \ - /tmp/clean-layer.sh - -# install cython & cysignals before pyfasttext -RUN pip install --upgrade cython && \ - pip install --upgrade cysignals && \ - pip install pyfasttext && \ - pip install ktext && \ - pip install fasttext && \ - apt-get install -y libhunspell-dev && pip install hunspell && \ - # b/138723119: annoy's latest version 1.16 was failing - pip install annoy==1.15.2 && \ - # Need to use CountEncoder from category_encoders before it's officially released - pip install git+https://github.com/scikit-learn-contrib/categorical-encoding.git && \ - pip install google-cloud-automl && \ - # Newer version crashes (latest = 1.14.0) when running tensorflow. - # python -c "from google.cloud import bigquery; import tensorflow". This flow is common because bigquery is imported in kaggle_gcp.py - # which is loaded at startup. - pip install google-cloud-bigquery==1.12.1 && \ - pip install google-cloud-storage && \ - pip install ortools && \ - pip install scattertext && \ - # Pandas data reader - pip install pandas-datareader && \ - pip install pykoko && \ - pip install wordsegment && \ - pip install pyahocorasick && \ - pip install wordbatch && \ - pip install emoji && \ - # Add Japanese morphological analysis engine - pip install janome && \ - pip install wfdb && \ - pip install vecstack && \ - pip install sklearn-contrib-lightning && \ - # yellowbrick machine learning visualization library - pip install yellowbrick && \ - pip install mlcrate && \ - # Required to display Altair charts in Jupyter notebook - pip install vega3 && \ - jupyter nbextension install --sys-prefix --py vega3 && \ - /tmp/clean-layer.sh - -# Fast.ai and dependencies -RUN pip install bcolz && \ - pip install bleach && \ - pip install certifi && \ - pip install cycler && \ - pip install decorator && \ - pip install entrypoints && \ - pip install html5lib && \ - pip install ipykernel && \ - pip install ipython && \ - pip install ipython-genutils && \ - pip install ipywidgets && \ - pip install isoweek && \ - pip install jedi && \ - pip install Jinja2 && \ - pip install jsonschema && \ - pip install jupyter && \ - pip install jupyter-client && \ - pip install jupyter-console && \ - pip install jupyter-core && \ - pip install MarkupSafe && \ - pip install mistune && \ - pip install nbconvert && \ - pip install nbformat && \ - pip install notebook==5.5.0 && \ - pip install olefile && \ - pip install opencv-python && \ - # b/124184516: tsfresh is not yet compatible with pandas 0.24.0 - pip install pandas==0.23.4 && \ - pip install pandas_summary && \ - pip install pandocfilters && \ - pip install pexpect && \ - pip install pickleshare && \ - pip install Pillow && \ - pip install ptyprocess && \ - pip install Pygments && \ - pip install pyparsing && \ - pip install pytz && \ - pip install PyYAML && \ - pip install pyzmq && \ - pip install qtconsole && \ - pip install simplegeneric && \ - pip install six && \ - pip install terminado && \ - pip install testpath && \ - # Latest version (6.0) of tornado breaks Jupyter notebook: - # https://github.com/jupyter/notebook/issues/4439 - pip install tornado==5.0.2 && \ - pip install tqdm && \ - pip install traitlets && \ - pip install wcwidth && \ - pip install webencodings && \ - pip install widgetsnbextension && \ - # Latest version of pyarrow conflicts with pandas - # https://github.com/pandas-dev/pandas/issues/23053 - pip install pyarrow==0.10.0 && \ - pip install feather-format && \ - pip install fastai && \ - pip install torchtext && \ - /tmp/clean-layer.sh - -# allennlp and dependencies -# TODO: install deps when underlying dependency is fixed. https://github.com/Kaggle/docker-python/issues/548 -RUN pip install jsonnet overrides tensorboardX && \ - pip install flask>=1.0.2 flask-cors>=3.0.7 gevent>=1.3.6 && \ - pip install unidecode parsimonious>=0.8.0 sqlparse>=0.2.4 word2number>=1.1 && \ - pip install pytorch-pretrained-bert>=0.6.0 jsonpickle && \ - pip install requests>=2.18 editdistance conllu==0.11 && \ - pip install --no-dependencies allennlp && \ - /tmp/clean-layer.sh - - ########### - # - # NEW CONTRIBUTORS: - # Please add new pip/apt installs in this block. Don't forget a "&& \" at the end - # of all non-final lines. Thanks! - # - ########### - -RUN pip install flashtext && \ - pip install marisa-trie && \ - pip install pyemd && \ - pip install pyupset && \ - pip install pympler && \ - pip install s3fs && \ - pip install featuretools && \ - pip install -e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper && \ - pip install hpsklearn && \ - pip install keras-tqdm && \ - pip install git+https://github.com/Kaggle/learntools && \ - pip install kmapper && \ - pip install shap && \ - pip install ray && \ - pip install gym && \ - pip install tensorforce && \ - pip install pyarabic && \ - pip install conx && \ - pip install pandasql && \ - pip install trackml && \ - pip install tensorflow_hub && \ - pip install jieba && \ - pip install git+https://github.com/SauceCat/PDPbox && \ - pip install ggplot && \ - pip install cesium && \ - pip install rgf_python && \ - # b/124184516: latest version forces the use of incompatible pandas>0.24 - pip install pytext-nlp==0.1.2 && \ - pip install tsfresh && \ - pip install pymagnitude && \ - pip install pykalman && \ - pip install optuna && \ - pip install chainercv && \ - pip install chainer-chemistry && \ - pip install plotly_express && \ - pip install albumentations && \ - pip install rtree && \ - pip install osmnx && \ - apt-get -y install libspatialindex-dev && \ - pip install pytorch-ignite && \ - /tmp/clean-layer.sh - -# Tesseract and some associated utility packages -RUN apt-get install tesseract-ocr -y && \ - pip install pytesseract && \ - pip install wand==0.5.3 && \ - pip install pdf2image && \ - pip install PyPDF && \ - pip install pyocr && \ - /tmp/clean-layer.sh -ENV TESSERACT_PATH=/usr/bin/tesseract - -# Pin Vowpal Wabbit v8.6.0 because 8.6.1 does not build or install successfully -RUN cd /usr/local/src && \ - git clone -b 8.6.0 https://github.com/JohnLangford/vowpal_wabbit.git && \ - ./vowpal_wabbit/python/conda_install.sh && \ - # Reinstall in non-editable mode (without the -e flag) - pip install vowpal_wabbit/python && \ - /tmp/clean-layer.sh - -# For Facets -ENV PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ -# For Theano with MKL -ENV MKL_THREADING_LAYER=GNU - -# Temporary fixes and patches - # Temporary patch for Dask getting downgraded, which breaks Keras -RUN pip install --upgrade dask && \ - # Stop jupyter nbconvert trying to rewrite its folder hierarchy - mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \ - mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \ - # Stop Matplotlib printing junk to the console on first load - sed -i "s/^.*Matplotlib is building the font cache using fc-list.*$/# Warning removed by Kaggle/g" /opt/conda/lib/python3.6/site-packages/matplotlib/font_manager.py && \ - # Make matplotlib output in Jupyter notebooks display correctly - mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ - /tmp/clean-layer.sh - -# gcloud SDK https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu -RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.cloud.google.com/apt cloud-sdk main" \ - | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | \ - apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ - apt-get update -y && apt-get install google-cloud-sdk -y && \ - /tmp/clean-layer.sh - -# Add BigQuery client proxy settings -ENV PYTHONUSERBASE "/root/.local" -ADD patches/kaggle_gcp.py /root/.local/lib/python3.6/site-packages/kaggle_gcp.py -ADD patches/kaggle_secrets.py /root/.local/lib/python3.6/site-packages/kaggle_secrets.py -ADD patches/log.py /root/.local/lib/python3.6/site-packages/log.py -ADD patches/sitecustomize.py /root/.local/lib/python3.6/site-packages/sitecustomize.py - -# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have -# worker tunneling support in place. -ENV JUPYTER_CONFIG_DIR "/root/.jupyter/" -RUN pip install jupyter_tensorboard && \ - jupyter serverextension enable jupyter_tensorboard && \ - jupyter tensorboard enable -ADD patches/tensorboard/notebook.py /opt/conda/lib/python3.6/site-packages/tensorboard/notebook.py - -# Set backend for matplotlib -ENV MPLBACKEND "agg" diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl new file mode 100644 index 00000000..9d661201 --- /dev/null +++ b/Dockerfile.tmpl @@ -0,0 +1,164 @@ +{{ if eq .Accelerator "gpu" }} +FROM us-docker.pkg.dev/colab-images/public/runtime:release-colab-external_20260126-060048_RC00 +{{ else }} +FROM us-docker.pkg.dev/colab-images/public/cpu-runtime:release-colab-external_20260123-060023_RC00 +{{ end}} + +ADD kaggle_requirements.txt /kaggle_requirements.txt + +# Freeze existing requirements from base image for critical packages: +RUN pip freeze | grep -E 'tensorflow|keras|torch|jax' > /colab_requirements.txt + +# Merge requirements files: +RUN cat /colab_requirements.txt >> /requirements.txt +RUN cat /kaggle_requirements.txt >> /requirements.txt + +# Install Kaggle packages +RUN uv pip install --system --no-cache -r /requirements.txt + +# Install manual packages: +# b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data. +RUN uv pip uninstall --system --no-cache google-cloud-bigquery-storage + +# uv cannot install this in requirements.txt without --no-build-isolation +# to avoid affecting the larger build, we'll post-install it. +RUN uv pip install --no-build-isolation --no-cache --system "git+https://github.com/Kaggle/learntools" + +# b/404590350: Ray and torchtune have conflicting cli named `tune`. `ray` is not part of Colab's base image. Re-install `tune` to ensure the torchtune CLI is available by default. +# b/468367647: Unpin protobuf, version greater than v5.29.5 causes issues with numerous packages +RUN uv pip install --system --force-reinstall --no-cache --no-deps torchtune +RUN uv pip install --system --force-reinstall --no-cache "protobuf==5.29.5" + +# Adding non-package dependencies: +ADD clean-layer.sh /tmp/clean-layer.sh +ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl +ADD patches/template_conf.json /opt/kaggle/conf.json + +ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages + +# Install GPU-specific non-pip packages. +{{ if eq .Accelerator "gpu" }} +RUN uv pip install --system --no-cache "pycuda" +{{ end }} + + +# Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, +# as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 +RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ + apt-get update --allow-releaseinfo-change && \ + # Needed by lightGBM (GPU build) + # https://lightgbm.readthedocs.io/en/latest/GPU-Tutorial.html#build-lightgbm + apt-get install -y build-essential unzip cmake libboost-dev libboost-system-dev libboost-filesystem-dev p7zip-full && \ + # b/182601974: ssh client was removed from the base image but is required for packages such as stable-baselines. + apt-get install -y openssh-client && \ + apt-get install -y graphviz && pip install graphviz && \ + /tmp/clean-layer.sh + +ADD patches/keras_internal.py \ + patches/keras_internal_test.py \ + $PACKAGE_PATH/tensorflow_decision_forests/keras/ + +RUN apt-get install -y libfreetype6-dev && \ + apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ + /tmp/clean-layer.sh + +RUN mkdir -p /usr/share/nltk_data && \ + # NLTK Downloader no longer continues smoothly after an error, so we explicitly list + # the corpuses that work + python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ + basque_grammars biocreative_ppi bllip_wsj_no_aux \ + book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ + comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ + europarl_raw floresta gazetteers genesis gutenberg \ + ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ + masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ + mte_teip5 names nps_chat omw opinion_lexicon paradigms \ + pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ + pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ + sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ + state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ + twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ + vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe + +RUN apt-get install -y git-lfs && \ + # vtk dependencies + apt-get install -y libgl1-mesa-glx && \ + # xvfbwrapper dependencies + apt-get install -y xvfb && \ + /tmp/clean-layer.sh + +# Download base easyocr models. +# https://github.com/JaidedAI/EasyOCR#usage +RUN mkdir -p /root/.EasyOCR/model && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip" -O /root/.EasyOCR/model/latin.zip && \ + unzip /root/.EasyOCR/model/latin.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/latin.zip && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/english_g2.zip" -O /root/.EasyOCR/model/english.zip && \ + unzip /root/.EasyOCR/model/english.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/english.zip && \ + wget --no-verbose "https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip" -O /root/.EasyOCR/model/craft_mlt_25k.zip && \ + unzip /root/.EasyOCR/model/craft_mlt_25k.zip -d /root/.EasyOCR/model/ && \ + rm /root/.EasyOCR/model/craft_mlt_25k.zip && \ + /tmp/clean-layer.sh + +# Tesseract and some associated utility packages +RUN apt-get install tesseract-ocr -y && \ + /tmp/clean-layer.sh + +ENV TESSERACT_PATH=/usr/bin/tesseract \ + # For Facets, we also include an empty path to include $PWD. + PYTHONPATH=:$PYTHONPATH:/opt/facets/facets_overview/python/ \ + # For Theano with MKL + MKL_THREADING_LAYER=GNU + +# Temporary fixes and patches +# Stop jupyter nbconvert trying to rewrite its folder hierarchy +RUN mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \ + mkdir -p /.jupyter && touch /.jupyter/jupyter_nbconvert_config.py && touch /.jupyter/migrated && \ + # Make matplotlib output in Jupyter notebooks display correctly + mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ + /tmp/clean-layer.sh + +# install imagemagick for wand +# https://docs.wand-py.org/en/latest/guide/install.html#install-imagemagick-on-debian-ubuntu +RUN apt-get install libmagickwand-dev && \ + /tmp/clean-layer.sh + +# Override default imagemagick policies +ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml + +# Add Kaggle module resolver +ADD patches/kaggle_module_resolver.py $PACKAGE_PATH/tensorflow_hub/kaggle_module_resolver.py +RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' $PACKAGE_PATH/tensorflow_hub/config.py && \ + sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' $PACKAGE_PATH/tensorflow_hub/config.py + +# Add BigQuery client proxy settings +ENV PYTHONUSERBASE="/root/.local" +ADD patches/kaggle_gcp.py \ + patches/kaggle_secrets.py \ + patches/kaggle_session.py \ + patches/kaggle_web_client.py \ + patches/kaggle_datasets.py \ + $PACKAGE_PATH/ + +# Figure out why this is in a different place? +# Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it. +ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py + +ARG GIT_COMMIT=unknown \ + BUILD_DATE=unknown + +LABEL git-commit=$GIT_COMMIT \ + build-date=$BUILD_DATE + +ENV GIT_COMMIT=${GIT_COMMIT} \ + BUILD_DATE=${BUILD_DATE} + +# Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`. +RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date + +{{ if eq .Accelerator "gpu" }} +# Add the CUDA home. +ENV CUDA_HOME=/usr/local/cuda +{{ end }} +ENTRYPOINT ["/usr/bin/env"] diff --git a/Jenkinsfile b/Jenkinsfile index 512b04a2..c4af03e6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,4 +1,4 @@ -String cron_string = BRANCH_NAME == "master" ? "H 12 * * 1,3,5" : "" +String cron_string = BRANCH_NAME == "main" ? "H 12 * * 1-5" : "" // Mon-Fri at noon UTC, 8am EST, 5am PDT pipeline { agent { label 'ephemeral-linux' } @@ -14,138 +14,186 @@ pipeline { GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim() GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim() GIT_COMMIT_SUMMARY = "`` ${GIT_COMMIT_SUBJECT} - ${GIT_COMMIT_AUTHOR}" - SLACK_CHANNEL = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"master\" ]]; then echo \"#kernelops\"; else echo \"#builds\"; fi").trim() - PRETEST_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"master\" ]]; then echo \"ci-pretest\"; else echo \"${GIT_BRANCH}-pretest\"; fi").trim() - STAGING_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"master\" ]]; then echo \"staging\"; else echo \"${GIT_BRANCH}-staging\"; fi").trim() + MATTERMOST_CHANNEL = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"#kernelops\"; else echo \"#builds\"; fi").trim() + // Use dev branch names as tags, but replace '/' with '-' using sed since docker images don't support forward slash + PRETEST_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"ci-pretest\"; else echo \"${GIT_BRANCH}-pretest\" | sed 's/\\//-/g'; fi").trim() + STAGING_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"staging\"; else echo \"${GIT_BRANCH}-staging\" | sed 's/\\//-/g'; fi").trim() } stages { - stage('Docker CPU Build') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - ./build | ts - ''' - } - } - - stage('Push CPU Pretest Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./push ${PRETEST_TAG} - ''' - } - } - - stage('Test CPU Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./test - ''' - } - } - - stage('Push CPU Image') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./push ${STAGING_TAG} - ''' - } - } - - stage('Docker GPU Build') { - // A GPU is not required to build this image. However, in our current setup, - // the default runtime is set to nvidia (as opposed to runc) and there - // is no option to specify a runtime for the `docker build` command. - // - // TODO(rosbo) don't set `nvidia` as the default runtime and use the - // `--runtime=nvidia` flag for the `docker run` command when GPU support is needed. - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - docker image prune -f # remove previously built image to prevent disk from filling up - ./build --gpu --base-image-tag ${STAGING_TAG} | ts - ''' - } - } - - stage('Push GPU Pretest Image') { - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./push --gpu ${PRETEST_TAG} - ''' - } - } - - stage('Test GPU Image') { - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./test --gpu - ''' - } - } - - stage('Push GPU Image') { - agent { label 'ephemeral-linux-gpu' } - steps { - sh '''#!/bin/bash - set -exo pipefail - - date - ./push --gpu ${STAGING_TAG} - ''' + stage('Build/Test/Diff') { + parallel { + stage('CPU') { + stages { + stage('Build CPU Image') { + options { + timeout(time: 120, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail + + ./build | ts + ./push ${PRETEST_TAG} + ''' + } + } + stage('Diff CPU image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + docker pull gcr.io/kaggle-images/python:${PRETEST_TAG} + ./diff --target gcr.io/kaggle-images/python:${PRETEST_TAG} + ''' + } + } + } + } + stage('GPU') { + agent { label 'ephemeral-linux-gpu' } + stages { + stage('Build GPU Image') { + options { + timeout(time: 4324, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail + # Remove images (dangling or not) created more than 72h (3 days ago) to prevent the GPU agent disk from filling up. + # Note: CPU agents are ephemeral and do not need to have their disk cleaned up. + docker image prune --all --force --filter "until=72h" --filter "label=kaggle-lang=python" + # Remove any dangling images (no tags). + # All builds for the same branch uses the same tag. This means a subsequent build for the same branch + # will untag the previously built image which is safe to do. Builds for a single branch are performed + # serially. + docker image prune -f + + ./build --gpu | ts + ./push --gpu ${PRETEST_TAG} + ''' + } + } + stage('Diff GPU Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./diff --gpu --target gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } + } + } + } + stage('TPU VM') { + agent { label 'ephemeral-linux' } + stages { + stage('Build TPU VM Image') { + options { + timeout(time: 60, unit: 'MINUTES') + } + steps { + sh '''#!/bin/bash + set -exo pipefail + + ./tpu/build | ts + ./push --tpu ${PRETEST_TAG} + ''' + } + } + stage('Diff TPU VM Image') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + docker pull gcr.io/kaggle-private-byod/python-tpuvm:${PRETEST_TAG} + ./diff --tpu --target gcr.io/kaggle-private-byod/python-tpuvm:${PRETEST_TAG} + ''' + } + } + } + } } } - stage('Package Versions') { + stage('Test') { parallel { - stage('CPU Diff') { + stage('Test CPU Image') { + options { + timeout(time: 15, unit: 'MINUTES') + } steps { - sh '''#!/bin/bash - ./diff - ''' + retry(2) { + sh '''#!/bin/bash + set -exo pipefail + + date + docker pull gcr.io/kaggle-images/python:${PRETEST_TAG} + ./test --image gcr.io/kaggle-images/python:${PRETEST_TAG} + ''' + } } } - stage('GPU Diff') { + stage('Test on P100') { agent { label 'ephemeral-linux-gpu' } + options { + timeout(time: 40, unit: 'MINUTES') + } + steps { + retry(2) { + sh '''#!/bin/bash + set -exo pipefail + + date + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./test --gpu --image gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } + } + } + stage('Test on T4x2') { + agent { label 'ephemeral-linux-gpu-t4x2' } + options { + timeout(time: 60, unit: 'MINUTES') + } steps { - sh '''#!/bin/bash - ./diff --gpu - ''' + retry(2) { + sh '''#!/bin/bash + set -exo pipefail + + date + docker pull gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ./test --gpu --image gcr.io/kaggle-private-byod/python:${PRETEST_TAG} + ''' + } } } } } + + stage('Label CPU/GPU Staging Images') { + steps { + sh '''#!/bin/bash + set -exo pipefail + + gcloud container images add-tag gcr.io/kaggle-images/python:${PRETEST_TAG} gcr.io/kaggle-images/python:${STAGING_TAG} + gcloud container images add-tag gcr.io/kaggle-private-byod/python:${PRETEST_TAG} gcr.io/kaggle-private-byod/python:${STAGING_TAG} + # NOTE(b/336842777): TPUVM images are tested on an actual TPU VM outside this pipeline, so they are not auto-promoted to :staging tag. + ''' + } + } } post { failure { - slackSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.SLACK_CHANNEL + mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } success { - slackSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL + mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } aborted { - slackSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY}", channel: env.SLACK_CHANNEL + mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } } } diff --git a/README.md b/README.md index 734726df..315e7db2 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,25 @@ -[Kaggle Kernels](https://www.kaggle.com/kernels) allow users to run a Python Notebook in the cloud against our competitions and datasets without having to download data or set up their environment. +# docker-python -This repository includes our Dockerfiles for building the [CPU-only](Dockerfile) and [GPU](gpu.Dockerfile) image that runs Python Kernels on Kaggle. +[Kaggle Notebooks](https://www.kaggle.com/notebooks) allow users to run a Python Notebook in the cloud against our competitions and datasets without having to download data or set up their environment. -Our Python Docker images are stored on Google Container Registry at: +This repository includes the [Dockerfile](Dockerfile.tmpl) for building the CPU-only and GPU image that runs Python Notebooks on Kaggle. -* CPU-only: [gcr.io/kaggle-images/python](https://gcr.io/kaggle-images/python) -* GPU: private for now, we will make it public soon. - -Note: The base image for the GPU image is our CPU-only image. The [gpu.Dockerfile](gpu.Dockerfile) adds a few extra layers to install GPU related libraries and packages (cuda, libcudnn, pycuda etc.) and reinstall packages with specific GPU builds (torch, tensorflow and a few mores). - -## Getting started +Our Python Docker images are stored on the Google Container Registry at: -To get started with this image, read our [guide](http://blog.kaggle.com/2016/02/05/how-to-get-started-with-data-science-in-containers/) to using it yourself, or browse [Kaggle Kernels](https://www.kaggle.com/kernels) for ideas. +* CPU-only: [gcr.io/kaggle-images/python](https://gcr.io/kaggle-images/python) +* GPU: [gcr.io/kaggle-gpu-images/python](https://gcr.io/kaggle-gpu-images/python) ## Requesting new packages -First, evaluate whether installing the package yourself in your own Kernels suits your needs. See [guide](https://github.com/Kaggle/docker-python/wiki/Missing-Packages). +First, evaluate whether installing the package yourself in your own notebooks suits your needs. See [guide](https://github.com/Kaggle/docker-python/wiki/Missing-Packages). If you the first step above doesn't work for your use case, [open an issue](https://github.com/Kaggle/docker-python/issues/new) or a [pull request](https://github.com/Kaggle/docker-python/pulls). ## Opening a pull request -1. Update the *Dockerfile* - 1. For changes specific to the GPU image, update the [gpu.Dockerfile](gpu.Dockerfile). - 1. Otherwise, update the [Dockerfile](Dockerfile). +1. Edit [kaggle_requirements.txt](kaggle_requirements.txt). 1. Follow the instructions below to build a new image. -1. Add tests for your new package. See this [example](https://github.com/Kaggle/docker-python/blob/master/tests/test_fastai.py). +1. Add tests for your new package. See this [example](https://github.com/Kaggle/docker-python/blob/main/tests/test_fastai.py). 1. Follow the instructions below to test the new image. 1. Open a PR on this repo and you are all set! @@ -51,6 +45,8 @@ A suite of tests can be found under the `/tests` folder. You can run the test us Flags: * `--gpu` to test the GPU image. +* `--pattern test_keras.py` or `-p test_keras.py` to run a single test +* `--image gcr.io/kaggle-images/python:ci-pretest` or `-i gcr.io/kaggle-images/python:ci-pretest` to test against a specific image ## Running the image @@ -69,20 +65,7 @@ For the GPU image: # Run the image built locally: docker run --runtime nvidia --rm -it kaggle/python-gpu-build /bin/bash # Run the image pre-built image from gcr.io -# TODO: Our GPU images are not yet publicly available. +docker run --runtime nvidia --rm -it gcr.io/kaggle-gpu-images/python /bin/bash ``` To ensure your container can access the GPU, follow the instructions posted [here](https://github.com/Kaggle/docker-python/issues/361#issuecomment-448093930). - -## Tensorflow custom pre-built wheel - -A Tensorflow custom pre-built wheel is used mainly for: - -* Faster build time: Building tensorflow from sources takes ~1h. Keeping this process outside the main build allows faster iterations when working on our Dockerfiles. - -Building Tensorflow from sources: - -* Increase performance: When building from sources, we can leverage CPU specific optimizations -* Is required: Tensorflow with GPU support must be built from sources - -The [Dockerfile](tensorflow-whl/Dockerfile) and the [instructions](tensorflow-whl/README.md) can be found in the [tensorflow-whl folder/](tensorflow-whl/). diff --git a/build b/build index ae9a9779..83bbe577 100755 --- a/build +++ b/build @@ -9,12 +9,12 @@ Build a new Python Docker image. Options: -g, --gpu Build an image with GPU support. -c, --use-cache Use layer cache when building a new image. - -b, --base-image-tag TAG Base image tag. Defaults to value defined in DOCKERFILE. EOF } CACHE_FLAG='--no-cache' DOCKERFILE='Dockerfile' +ACCELERATOR='none' IMAGE_TAG='kaggle/python-build' BUILD_ARGS='' @@ -27,19 +27,11 @@ while :; do -g|--gpu) IMAGE_TAG='kaggle/python-gpu-build' DOCKERFILE='gpu.Dockerfile' + ACCELERATOR='gpu' ;; -c|--use-cache) CACHE_FLAG='' ;; - -b|--base-image-tag) - if [[ -z $2 ]]; then - usage - printf 'ERROR: No TAG specified after the %s flag.\n' "$1" >&2 - exit - fi - BUILD_ARGS="--build-arg BASE_TAG=$2" - shift # skip the flag value - ;; -?*) usage printf 'ERROR: Unknown option: %s\n' "$1" >&2 @@ -57,8 +49,21 @@ BUILD_ARGS+=" --build-arg BUILD_DATE=$(date '+%Y%m%d-%H%M%S')" readonly CACHE_FLAG readonly DOCKERFILE +readonly ACCELERATOR readonly IMAGE_TAG readonly BUILD_ARGS +SRCDIR=$(dirname "${BASH_SOURCE[0]}") +DOCKERFILE_OUTDIR="${SRCDIR}/.generated" +mkdir -p $DOCKERFILE_OUTDIR +DOCKERFILE_PATH="$DOCKERFILE_OUTDIR/$DOCKERFILE" + +# Generate Dockerfile from template. +echo "Generating Dockerfile from template..." +docker run --rm -v $PWD:/input:ro gcr.io/kaggle-images/go-renderizer:latest --ACCELERATOR=$ACCELERATOR /input/Dockerfile.tmpl > $DOCKERFILE_PATH +echo "==================== $DOCKERFILE START ====================" +cat $DOCKERFILE_PATH +echo "==================== $DOCKERFILE END ====================" + set -x -docker build --rm --pull $CACHE_FLAG -t "$IMAGE_TAG" -f "$DOCKERFILE" $BUILD_ARGS . +docker build --rm --pull $CACHE_FLAG -t "$IMAGE_TAG" -f "$DOCKERFILE_PATH" $BUILD_ARGS . diff --git a/clean-layer.sh b/clean-layer.sh index d1a048fc..9a50e7bf 100755 --- a/clean-layer.sh +++ b/clean-layer.sh @@ -10,8 +10,6 @@ set -e set -x -# Delete files that pip caches when installing a package. -rm -rf /root/.cache/pip/* # Delete old downloaded archive files apt-get autoremove -y # Delete downloaded archive files @@ -19,6 +17,4 @@ apt-get clean # Ensures the current working directory won't be deleted cd /usr/local/src/ # Delete source files used for building binaries -rm -rf /usr/local/src/* -# Delete conda downloaded tarballs -conda clean -y --tarballs +rm -rf /usr/local/src/* \ No newline at end of file diff --git a/dev.Dockerfile b/dev.Dockerfile new file mode 100644 index 00000000..4a661136 --- /dev/null +++ b/dev.Dockerfile @@ -0,0 +1,23 @@ +# This Dockerfile builds an image to quickly iterate on the kaggle libraries. +# +# Create a new image with the latest kaggle librairies using the latest image +# built by CI with a successful test run as the base. +# +# Usage: +# cd path/to/docker-python +# docker build -t kaggle/python-dev -f dev.Dockerfile . +# +# # you can run a container using the image using: +# docker run -it --rm kaggle/python-dev /bin/bash +# +# # you can run the tests against this new image using: +# ./test -i kaggle/python-dev -p test_user_secrets.py +# +FROM gcr.io/kaggle-images/python:staging + +ADD patches/kaggle_gcp.py /root/.local/lib/python3.7/site-packages/kaggle_gcp.py +ADD patches/kaggle_secrets.py /root/.local/lib/python3.7/site-packages/kaggle_secrets.py +ADD patches/kaggle_session.py /root/.local/lib/python3.7/site-packages/kaggle_session.py +ADD patches/kaggle_web_client.py /root/.local/lib/python3.7/site-packages/kaggle_web_client.py +ADD patches/kaggle_datasets.py /root/.local/lib/python3.7/site-packages/kaggle_datasets.py +ADD patches/sitecustomize.py /root/.local/lib/python3.7/site-packages/sitecustomize.py \ No newline at end of file diff --git a/diff b/diff index bcdaf003..c8251703 100755 --- a/diff +++ b/diff @@ -8,15 +8,19 @@ Compare a given Docker image package versions against the prod image. Options: -g, --gpu Compare GPU images. - -t, --target The image to diff against the prod image. + -b, --base The base image to diff against. + -t, --target The image to diff against the base image. Default is the locally built image. + -p, --package Only show diff for this package and its dependencies. EOF } BASE_IMAGE_TAG='gcr.io/kaggle-images/python:latest' +BASE_IMAGE_TAG_OVERRIDE='' TARGET_IMAGE_TAG='kaggle/python-build' TARGET_IMAGE_TAG_OVERRIDE='' +PACKAGE_NAME='' while :; do case "$1" in @@ -28,6 +32,19 @@ while :; do BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/python:latest' TARGET_IMAGE_TAG='kaggle/python-gpu-build' ;; + -x|--tpu) + BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/python-tpuvm:latest' + TARGET_IMAGE_TAG='kaggle/python-tpuvm-build' + ;; + -b|--base) + if [[ -z "$2" ]]; then + usage + printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + BASE_IMAGE_TAG_OVERRIDE="$2" + shift # skip the flag value + ;; -t|--target) if [[ -z "$2" ]]; then usage @@ -37,6 +54,15 @@ while :; do TARGET_IMAGE_TAG_OVERRIDE="$2" shift # skip the flag value ;; + -p|--package) + if [[ -z "$2" ]]; then + usage + printf 'ERROR: No PACKAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + PACKAGE_NAME="$2" + shift # skip the flag value + ;; -?*) usage printf 'ERROR: Unknown option: %s\n' "$1" >&2 @@ -49,6 +75,10 @@ while :; do shift done +if [[ -n "$BASE_IMAGE_TAG_OVERRIDE" ]]; then + BASE_IMAGE_TAG="$BASE_IMAGE_TAG_OVERRIDE" +fi + if [[ -n "$TARGET_IMAGE_TAG_OVERRIDE" ]]; then TARGET_IMAGE_TAG="$TARGET_IMAGE_TAG_OVERRIDE" fi @@ -59,13 +89,22 @@ readonly TARGET_IMAGE_TAG echo "Base: $BASE_IMAGE_TAG" echo "Target: $TARGET_IMAGE_TAG" -docker pull "$BASE_IMAGE_TAG" +if [[ "$BASE_IMAGE_TAG" == "gcr.io/"* ]]; then + docker pull "$BASE_IMAGE_TAG" +fi + + +if [[ -n "$PACKAGE_NAME" ]]; then + echo "Package: $PACKAGE_NAME" + CMDS=("python /tools/pip_list_versions.py $PACKAGE_NAME | sort") +else + CMDS=("pip list --format=freeze" 'cat /etc/os-release | grep -oP "PRETTY_NAME=\"\K([^\"]*)"' "uname -r" "dpkg --list | awk '{print \$2\"==\"\$3}'" "printenv | sort") +fi -CMDS=('dpkg-query --show -f "${Package}==${Version}\n"' 'pip freeze') for cmd in "${CMDS[@]}"; do echo "== Comparing $cmd ==" diff --suppress-common-lines --side-by-side \ - <(docker run --rm "$BASE_IMAGE_TAG" $cmd) \ - <(docker run --rm "$TARGET_IMAGE_TAG" $cmd) \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$BASE_IMAGE_TAG" -c "$cmd") \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$TARGET_IMAGE_TAG" -c "$cmd") \ && echo 'No diff' || true done diff --git a/gpu.Dockerfile b/gpu.Dockerfile deleted file mode 100644 index cd8051c3..00000000 --- a/gpu.Dockerfile +++ /dev/null @@ -1,67 +0,0 @@ -ARG BASE_TAG=staging - -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 AS nvidia -FROM gcr.io/kaggle-images/python-tensorflow-whl:1.14.0-py36 as tensorflow_whl -FROM gcr.io/kaggle-images/python:${BASE_TAG} - -ADD clean-layer.sh /tmp/clean-layer.sh - -# Cuda support -COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg - -# Ensure the cuda libraries are compatible with the custom Tensorflow wheels. -# TODO(b/120050292): Use templating to keep in sync or COPY installed binaries from it. -ENV CUDA_VERSION=10.0.130 -ENV CUDA_PKG_VERSION=10-0=$CUDA_VERSION-1 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems. -# When intended to be used with actual GPUs, make sure to (besides providing access to the host -# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One -# convenient way to do so is to obscure its contents by a bind mount: -# docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ... -ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV NVIDIA_REQUIRE_CUDA="cuda>=10.0" -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cupti-$CUDA_PKG_VERSION \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-cudart-dev-$CUDA_PKG_VERSION \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libcudnn7=7.5.0.56-1+cuda10.0 \ - libcudnn7-dev=7.5.0.56-1+cuda10.0 \ - libnccl2=2.4.2-1+cuda10.0 \ - libnccl-dev=2.4.2-1+cuda10.0 && \ - ln -s /usr/local/cuda-10.0 /usr/local/cuda && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ - /tmp/clean-layer.sh - -# Reinstall packages with a separate version for GPU support -# Tensorflow -COPY --from=tensorflow_whl /tmp/tensorflow_gpu/*.whl /tmp/tensorflow_gpu/ -RUN pip uninstall -y tensorflow && \ - pip install /tmp/tensorflow_gpu/tensorflow*.whl && \ - rm -rf /tmp/tensorflow_gpu && \ - conda remove --force -y pytorch-cpu torchvision-cpu && \ - conda install -y pytorch torchvision cudatoolkit=10.0 -c pytorch && \ - pip uninstall -y mxnet && \ - # b/126259508 --no-deps prevents numpy from being downgraded. - pip install --no-deps mxnet-cu100 && \ - /tmp/clean-layer.sh - -# Install GPU-only packages -RUN pip install pycuda && \ - pip install cupy-cuda100 && \ - pip install pynvrtc && \ - /tmp/clean-layer.sh - -# Re-add TensorBoard Jupyter extension patch -ADD patches/tensorboard/notebook.py /opt/conda/lib/python3.6/site-packages/tensorboard/notebook.py diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt new file mode 100644 index 00000000..30e0683f --- /dev/null +++ b/kaggle_requirements.txt @@ -0,0 +1,128 @@ +# Please keep this in alphabetical order +Boruta +Cartopy +ImageHash +Janome +PyArabic +PyUpSet +Pympler +Rtree +shapely +SimpleITK +TPOT +Wand +bayesian-optimization +boto3 +catboost +category-encoders +cesium +comm +cytoolz +# Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported" +# https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor +datasets>=2.14.6 +deap +dipy +docker +easyocr +emoji +fasttext +featuretools +fiona +fury +fuzzywuzzy +geojson +gensim +# b/443054743,b/455550872 +google-adk[a2a,eval]>=1.21.0 +google-cloud-aiplatform +google-cloud-videointelligence +google-cloud-vision +google-genai +gpxpy +h2o +haversine +hep-ml +igraph +ipympl +ipywidgets==8.1.5 +isoweek +jedi +# jitler 0.11.1 breaks simulation image +jiter==0.10.0 +# b/276358430: fix Jupyter lsp freezing up the jupyter server +jupyter-lsp==1.5.1 +# b/333854354: pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354) +jupyter_server==2.12.5 +jupyter_server_proxy +jupyterlab +jupyterlab-lsp +kaggle>=1.8.3 +kaggle-environments +kagglehub[pandas-datasets,hf-datasets,signing]>=0.4.2 +keras-cv +keras-nlp +keras-tuner +kornia +langid +libpysal +lime +line_profiler +mamba +matplotlib +mlcrate +mne +mpld3 +# b/274619697: learntools requires a specific nbconvert right now +nbconvert==6.4.5 +nbdev +nilearn +olefile +onnx +openslide-bin +openslide-python +optuna +pandas-profiling +pandasql +papermill +path +path.py +pdf2image +plotly-express +pudb +pyLDAvis +pycryptodome +pydicom +pyemd +pyexcel-ods +pymongo +pypdf +pytesseract +python-lsp-server +pytorch-ignite +pytorch-lightning +qgrid +qtconsole +ray +rgf-python +s3fs +scikit-learn +# Scikit-learn accelerated library for x86 +scikit-learn-intelex>=2023.0.1 +scikit-multilearn +scikit-optimize +scikit-plot +scikit-surprise +git+https://github.com/facebookresearch/segment-anything.git +squarify +tensorflow-io +# Must be compatible with torch version: https://github.com/meta-pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec +torchcodec==0.9 +torchinfo +torchmetrics +torchtune +transformers>=5.0.0 +vtk +wavio +xvfbwrapper +ydata-profiling diff --git a/kaggle_tools_update.Dockerfile b/kaggle_tools_update.Dockerfile deleted file mode 100644 index 8f7fac20..00000000 --- a/kaggle_tools_update.Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# This Dockerfile is a temporary solution until we -# resolved the broken main build. - -# This Dockerfile creates a new image based on our -# current published python image with the latest -# version of the LearnTools library to allow us -# to release new Learn content. It also configures -# pip to work out-of-the-box when internet access -# is enabled. - -# Usage: -# docker rmi gcr.io/kaggle-images/python:pinned -# docker build --rm -t kaggle/python-build -f kaggle_tools_update.Dockerfile . -# ./test -# ./push (if tests are passing) - -# Pull the last build manually tagged as "pinned". -FROM gcr.io/kaggle-images/python:pinned - -RUN pip install --upgrade git+https://github.com/Kaggle/learntools \ No newline at end of file diff --git a/patches/imagemagick-policy.xml b/patches/imagemagick-policy.xml new file mode 100644 index 00000000..981fb59f --- /dev/null +++ b/patches/imagemagick-policy.xml @@ -0,0 +1,10 @@ + + + + + +]> + \ No newline at end of file diff --git a/patches/kaggle_datasets.py b/patches/kaggle_datasets.py new file mode 100644 index 00000000..e60db2ef --- /dev/null +++ b/patches/kaggle_datasets.py @@ -0,0 +1,42 @@ +import os +import sys +from os import listdir +from os.path import isdir, join +from kaggle_web_client import KaggleWebClient + +_KAGGLE_TPU_NAME_ENV_VAR_NAME = 'TPU_NAME' +_KAGGLE_TPUVM_NAME_ENV_VAR_NAME = 'ISTPUVM' +_KAGGLE_INPUT_DIR = '/kaggle/input' + +class KaggleDatasets: + GET_GCS_PATH_ENDPOINT = '/requests/CopyDatasetVersionToKnownGcsBucketRequest' + TIMEOUT_SECS = 600 + + # Integration types for GCS + AUTO_ML = 1 + TPU = 2 + + def __init__(self): + self.web_client = KaggleWebClient() + self.has_tpu = os.getenv(_KAGGLE_TPU_NAME_ENV_VAR_NAME) is not None + self.has_tpuvm = os.getenv(_KAGGLE_TPUVM_NAME_ENV_VAR_NAME) is not None + + def get_gcs_path(self, dataset_dir: str = None) -> str: + if self.has_tpuvm: + if dataset_dir is None: + onlydirs = [f for f in listdir(_KAGGLE_INPUT_DIR) if isdir(join(_KAGGLE_INPUT_DIR, f))] + if len(onlydirs) == 1: + dataset_dir = onlydirs[0] + else: + raise Exception("Could not infer dataset_dir. dataset_dir can only be inferred if there is exactly 1 Kaggle dataset attached.") + dataset = join(_KAGGLE_INPUT_DIR, dataset_dir) + print("get_gcs_path is not required on TPU VMs which can directly use Kaggle datasets, using path: " + dataset, file=sys.stderr) + return dataset + + integration_type = self.TPU if self.has_tpu else self.AUTO_ML + data = { + 'MountSlug': dataset_dir, + 'IntegrationType': integration_type, + } + result = self.web_client.make_post_request(data, self.GET_GCS_PATH_ENDPOINT, self.TIMEOUT_SECS) + return result['destinationBucket'] diff --git a/patches/kaggle_gcp.py b/patches/kaggle_gcp.py index 2fccd228..4cb98858 100644 --- a/patches/kaggle_gcp.py +++ b/patches/kaggle_gcp.py @@ -1,14 +1,15 @@ import os import inspect -from google.auth import credentials +import logging +from google.auth import credentials, environment_vars from google.auth.exceptions import RefreshError +from google.api_core.gapic_v1.client_info import ClientInfo from google.cloud import bigquery from google.cloud.exceptions import Forbidden from google.cloud.bigquery._http import Connection from kaggle_secrets import GcpTarget, UserSecretsClient -from log import Log - +KAGGLE_GCP_CLIENT_USER_AGENT="kaggle-gcp-client/1.0" def get_integrations(): kernel_integrations_var = os.getenv("KAGGLE_KERNEL_INTEGRATIONS") @@ -20,7 +21,7 @@ def get_integrations(): target = GcpTarget[integration.upper()] kernel_integrations.add_integration(target) except KeyError as e: - Log.error(f"Unknown integration target: {e}") + logging.debug(f"Unknown integration target: {integration.upper()}") return kernel_integrations @@ -40,6 +41,9 @@ def has_bigquery(self): def has_gcs(self): return GcpTarget.GCS in self.integrations + def has_cloudai(self): + return GcpTarget.CLOUDAI in self.integrations or \ + GcpTarget.AUTOML in self.integrations class KaggleKernelCredentials(credentials.Credentials): """Custom Credentials used to authenticate using the Kernel's connected OAuth account. @@ -58,27 +62,33 @@ def refresh(self, request): self.token, self.expiry = client.get_bigquery_access_token() elif self.target == GcpTarget.GCS: self.token, self.expiry = client._get_gcs_access_token() + elif self.target == GcpTarget.CLOUDAI: + self.token, self.expiry = client._get_cloudai_access_token() except ConnectionError as e: - Log.error(f"Connection error trying to refresh access token: {e}") + logging.error(f"Connection error trying to refresh access token: {e}") print("There was a connection error trying to fetch the access token. " f"Please ensure internet is on in order to use the {self.target.service} Integration.") raise RefreshError('Unable to refresh access token due to connection error.') from e except Exception as e: - Log.error(f"Error trying to refresh access token: {e}") + logging.error(f"Error trying to refresh access token: {e}") if (not get_integrations().has_integration(self.target)): - Log.error(f"No {self.target.service} integration found.") + logging.error(f"No {self.target.service} integration found.") print( - f"Please ensure you have selected a {self.target.service} account in the Kernels Settings sidebar.") + f"Please ensure you have selected a {self.target.service} account in the Notebook Add-ons menu.") raise RefreshError('Unable to refresh access token.') from e +class KaggleKernelWithProjetCredentials(KaggleKernelCredentials): + """ Wrapper Kaggle Credentials with quota_project_id. + """ + def __init__(self, parentCredential=None, quota_project_id=None): + super().__init__(target=parentCredential.target) + self._quota_project_id=quota_project_id class _DataProxyConnection(Connection): """Custom Connection class used to proxy the BigQuery client to Kaggle's data proxy.""" - API_BASE_URL = os.getenv("KAGGLE_DATA_PROXY_URL") - - def __init__(self, client): - super().__init__(client) + def __init__(self, client, **kwargs): + super().__init__(client, **kwargs) self.extra_headers["X-KAGGLE-PROXY-DATA"] = os.getenv( "KAGGLE_DATA_PROXY_TOKEN") @@ -89,9 +99,9 @@ def api_request(self, *args, **kwargs): return super().api_request(*args, **kwargs) except Forbidden as e: msg = ("Permission denied using Kaggle's public BigQuery integration. " - "Did you mean to select a BigQuery account in the Kernels Settings sidebar?") + "Did you mean to select a BigQuery account in the Notebook Add-ons menu?") print(msg) - Log.info(msg) + logging.info(msg) raise e @@ -104,28 +114,32 @@ class PublicBigqueryClient(bigquery.client.Client): def __init__(self, *args, **kwargs): data_proxy_project = os.getenv("KAGGLE_DATA_PROXY_PROJECT") + default_api_endpoint = os.getenv("KAGGLE_DATA_PROXY_URL") anon_credentials = credentials.AnonymousCredentials() anon_credentials.refresh = lambda *args: None super().__init__( project=data_proxy_project, credentials=anon_credentials, *args, **kwargs ) # TODO: Remove this once https://github.com/googleapis/google-cloud-python/issues/7122 is implemented. - self._connection = _DataProxyConnection(self) + self._connection = _DataProxyConnection(self, api_endpoint=default_api_endpoint) def has_been_monkeypatched(method): return "kaggle_gcp" in inspect.getsourcefile(method) +def is_user_secrets_token_set(): + return "KAGGLE_USER_SECRETS_TOKEN" in os.environ + +def is_proxy_token_set(): + return "KAGGLE_DATA_PROXY_TOKEN" in os.environ + def init_bigquery(): - from google.auth import environment_vars from google.cloud import bigquery - is_proxy_token_set = "KAGGLE_DATA_PROXY_TOKEN" in os.environ - is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ - if not (is_proxy_token_set or is_user_secrets_token_set): + if not (is_proxy_token_set() or is_user_secrets_token_set()): return bigquery - # If this Kernel has bigquery integration on startup, preload the Kaggle Credentials - # object for magics to work. + # If this Notebook has bigquery integration on startup, preload the Kaggle Credentials + # object for magics to work. if get_integrations().has_bigquery(): from google.cloud.bigquery import magics magics.context.credentials = KaggleKernelCredentials() @@ -134,32 +148,33 @@ def monkeypatch_bq(bq_client, *args, **kwargs): from kaggle_gcp import get_integrations, PublicBigqueryClient, KaggleKernelCredentials specified_credentials = kwargs.get('credentials') has_bigquery = get_integrations().has_bigquery() - # Prioritize passed in project id, but if it is missing look for env var. + # Prioritize passed in project id, but if it is missing look for env var. arg_project = kwargs.get('project') explicit_project_id = arg_project or os.environ.get(environment_vars.PROJECT) # This is a hack to get around the bug in google-cloud library. # Remove these two lines once this is resolved: # https://github.com/googleapis/google-cloud-python/issues/8108 if explicit_project_id: - Log.info(f"Explicit project set to {explicit_project_id}") + logging.info(f"Explicit project set to {explicit_project_id}") kwargs['project'] = explicit_project_id if explicit_project_id is None and specified_credentials is None and not has_bigquery: msg = "Using Kaggle's public dataset BigQuery integration." - Log.info(msg) + logging.info(msg) print(msg) return PublicBigqueryClient(*args, **kwargs) else: if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") kwargs['credentials'] = KaggleKernelCredentials() if (not has_bigquery): - Log.info("No bigquery integration found, creating client anyways.") + logging.info("No bigquery integration found, creating client anyways.") print('Please ensure you have selected a BigQuery ' - 'account in the Kernels Settings sidebar.') + 'account in the Notebook Add-ons menu.') if explicit_project_id is None: - Log.info("No project specified while using the unmodified client.") + logging.info("No project specified while using the unmodified client.") print('Please ensure you specify a project id when creating the client' ' in order to use your BigQuery account.') + kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) return bq_client(*args, **kwargs) # Monkey patches BigQuery client creation to use proxy or user-connected GCP account. @@ -171,10 +186,59 @@ def monkeypatch_bq(bq_client, *args, **kwargs): bq_client, *args, **kwargs) return bigquery +# Monkey patch for aiplatform init +# eg +# from google.cloud import aiplatform +# aiplatform.init(args) +def monkeypatch_aiplatform_init(aiplatform_klass, kaggle_kernel_credentials): + aiplatform_init = aiplatform_klass.init + def patched_init(*args, **kwargs): + specified_credentials = kwargs.get('credentials') + if specified_credentials is None: + logging.info("No credentials specified, using KaggleKernelCredentials.") + kwargs['credentials'] = kaggle_kernel_credentials + return aiplatform_init(*args, **kwargs) + + if (not has_been_monkeypatched(aiplatform_klass.init)): + aiplatform_klass.init = patched_init + logging.info("aiplatform.init patched") + +def monkeypatch_client(client_klass, kaggle_kernel_credentials): + client_init = client_klass.__init__ + def patched_init(self, *args, **kwargs): + specified_credentials = kwargs.get('credentials') + if specified_credentials is None: + logging.info("No credentials specified, using KaggleKernelCredentials.") + # Some GCP services demand the billing and target project must be the same. + # To avoid using default service account based credential as caller credential + # user need to provide ClientOptions with quota_project_id: + # srv.Client(client_options=client_options.ClientOptions(quota_project_id="YOUR PROJECT")) + client_options=kwargs.get('client_options') + if client_options != None and client_options.quota_project_id != None: + kwargs['credentials'] = KaggleKernelWithProjetCredentials( + parentCredential = kaggle_kernel_credentials, + quota_project_id = client_options.quota_project_id) + else: + kwargs['credentials'] = kaggle_kernel_credentials + + kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) + return client_init(self, *args, **kwargs) + + if (not has_been_monkeypatched(client_klass.__init__)): + client_klass.__init__ = patched_init + logging.info(f"Client patched: {client_klass}") + +def set_kaggle_user_agent(client_info: ClientInfo): + # Add kaggle client user agent in order to attribute usage. + if client_info is None: + client_info = ClientInfo(user_agent=KAGGLE_GCP_CLIENT_USER_AGENT) + else: + client_info.user_agent = KAGGLE_GCP_CLIENT_USER_AGENT + return client_info + def init_gcs(): - is_user_secrets_token_set = "KAGGLE_USER_SECRETS_TOKEN" in os.environ from google.cloud import storage - if not is_user_secrets_token_set: + if not is_user_secrets_token_set(): return storage from kaggle_gcp import get_integrations @@ -183,25 +247,116 @@ def init_gcs(): from kaggle_secrets import GcpTarget from kaggle_gcp import KaggleKernelCredentials - gcs_client_init = storage.Client.__init__ - def monkeypatch_gcs(self, *args, **kwargs): - specified_credentials = kwargs.get('credentials') - if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") - kwargs['credentials'] = KaggleKernelCredentials(target=GcpTarget.GCS) - return gcs_client_init(self, *args, **kwargs) - - if (not has_been_monkeypatched(storage.Client.__init__)): - storage.Client.__init__ = monkeypatch_gcs + monkeypatch_client( + storage.Client, + KaggleKernelCredentials(target=GcpTarget.GCS)) return storage +def init_translation_v2(): + from google.cloud import translate_v2 + if not is_user_secrets_token_set(): + return translate_v2 + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return translate_v2 + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(translate_v2.Client, kernel_credentials) + return translate_v2 + +def init_translation_v3(): + # Translate v3 exposes different client than translate v2. + from google.cloud import translate_v3 + if not is_user_secrets_token_set(): + return translate_v3 + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return translate_v3 + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(translate_v3.TranslationServiceClient, kernel_credentials) + return translate_v3 + +def init_natural_language(): + from google.cloud import language + if not is_user_secrets_token_set(): + return language + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return language + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(language.LanguageServiceClient, kernel_credentials) + monkeypatch_client(language.LanguageServiceAsyncClient, kernel_credentials) + return language + +def init_ucaip(): + from google.cloud import aiplatform + if not is_user_secrets_token_set(): + return + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return + + from kaggle_secrets import GcpTarget + from kaggle_gcp import KaggleKernelCredentials + kaggle_kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + + # Patch the ucaip init method, this flows down to all ucaip services + monkeypatch_aiplatform_init(aiplatform, kaggle_kernel_credentials) + +def init_video_intelligence(): + from google.cloud import videointelligence + if not is_user_secrets_token_set(): + return videointelligence + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return videointelligence + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client( + videointelligence.VideoIntelligenceServiceClient, + kernel_credentials) + monkeypatch_client( + videointelligence.VideoIntelligenceServiceAsyncClient, + kernel_credentials) + return videointelligence + +def init_vision(): + from google.cloud import vision + if not is_user_secrets_token_set(): + return vision + + from kaggle_gcp import get_integrations + if not get_integrations().has_cloudai(): + return vision + + from kaggle_secrets import GcpTarget + kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) + monkeypatch_client(vision.ImageAnnotatorClient, kernel_credentials) + monkeypatch_client(vision.ImageAnnotatorAsyncClient, kernel_credentials) + return vision + def init(): init_bigquery() init_gcs() + init_translation_v2() + init_translation_v3() + init_natural_language() + init_video_intelligence() + init_vision() + init_ucaip() # We need to initialize the monkeypatching of the client libraries # here since there is a circular dependency between our import hook version # google.cloud.* and kaggle_gcp. By calling init here, we guarantee # that regardless of the original import that caused google.cloud.* to be # loaded, the monkeypatching will be done. -init() +init() \ No newline at end of file diff --git a/patches/kaggle_module_resolver.py b/patches/kaggle_module_resolver.py new file mode 100644 index 00000000..430cb980 --- /dev/null +++ b/patches/kaggle_module_resolver.py @@ -0,0 +1,22 @@ +import os +import re +import kagglehub + +from tensorflow_hub import resolver + +short_url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/(?P[^\\/]+)/(?P[^\\/]+)/(?P[^\\/]+)/(?P[^\\/]+)/(?P[0-9]+)$") +long_url_pattern = re.compile(r"https?://([a-z]+\.)?kaggle.com/models/(?P[^\\/]+)/(?P[^\\/]+)/frameworks/(?P[^\\/]+)/variations/(?P[^\\/]+)/versions/(?P[0-9]+)$") + +def _is_on_kaggle_notebook(): + return os.getenv("KAGGLE_KERNEL_RUN_TYPE") != None and os.getenv("KAGGLE_USER_SECRETS_TOKEN") != None + +def _is_kaggle_handle(handle): + return long_url_pattern.match(handle) != None or short_url_pattern.match(handle) != None + +class KaggleFileResolver(resolver.HttpResolverBase): + def is_supported(self, handle): + return _is_on_kaggle_notebook() and _is_kaggle_handle(handle) + + def __call__(self, handle): + m = long_url_pattern.match(handle) or short_url_pattern.match(handle) + return kagglehub.model_download(f"{m.group('owner')}/{m.group('model')}/{m.group('framework').lower()}/{m.group('variation')}/{m.group('version')}") diff --git a/patches/kaggle_secrets.py b/patches/kaggle_secrets.py index d2fc0d9f..a177c171 100644 --- a/patches/kaggle_secrets.py +++ b/patches/kaggle_secrets.py @@ -4,26 +4,18 @@ (ie. BigQuery). """ -import json import os -import socket -import urllib.request from datetime import datetime, timedelta from enum import Enum, unique +import subprocess from typing import Optional, Tuple -from urllib.error import HTTPError, URLError +from kaggle_web_client import KaggleWebClient +from kaggle_web_client import (CredentialError, BackendError) -_KAGGLE_DEFAULT_URL_BASE = "https://www.kaggle.com" -_KAGGLE_URL_BASE_ENV_VAR_NAME = "KAGGLE_URL_BASE" -_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME = "KAGGLE_USER_SECRETS_TOKEN" -TIMEOUT_SECS = 40 - - -class CredentialError(Exception): +class ValidationError(Exception): pass - -class BackendError(Exception): +class NotFoundError(Exception): pass @unique @@ -31,6 +23,9 @@ class GcpTarget(Enum): """Enum class to store GCP targets.""" BIGQUERY = (1, "BigQuery") GCS = (2, "Google Cloud Storage") + # Old name, should remove later. + AUTOML = (3, "Cloud AutoML") + CLOUDAI = (3, "Google Cloud AI Platform") def __init__(self, target, service): self._target = target @@ -47,46 +42,74 @@ def service(self): class UserSecretsClient(): GET_USER_SECRET_ENDPOINT = '/requests/GetUserSecretRequest' - BIGQUERY_TARGET_VALUE = 1 + GET_USER_SECRET_BY_LABEL_ENDPOINT = '/requests/GetUserSecretByLabelRequest' def __init__(self): - url_base_override = os.getenv(_KAGGLE_URL_BASE_ENV_VAR_NAME) - self.url_base = url_base_override or _KAGGLE_DEFAULT_URL_BASE - # Follow the OAuth 2.0 Authorization standard (https://tools.ietf.org/html/rfc6750) - self.jwt_token = os.getenv(_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME) - if self.jwt_token is None: - raise CredentialError( - 'A JWT Token is required to use the UserSecretsClient, ' - f'but none found in environment variable {_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME}') - self.headers = {'Content-type': 'application/json'} - - def _make_post_request(self, data: dict) -> dict: - url = f'{self.url_base}{self.GET_USER_SECRET_ENDPOINT}' - request_body = dict(data) - request_body['JWE'] = self.jwt_token - req = urllib.request.Request(url, headers=self.headers, data=bytes( - json.dumps(request_body), encoding="utf-8")) + self.web_client = KaggleWebClient() + + def get_secret(self, label) -> str: + """Retrieves a user secret value by its label. + + This returns the value of the secret with the given label, + if it attached to the current kernel. + Example usage: + client = UserSecretsClient() + secret = client.get_secret('my_db_password') + """ + if label is None or len(label) == 0: + raise ValidationError("Label must be non-empty.") + request_body = { + 'Label': label, + } + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_BY_LABEL_ENDPOINT) + if 'secret' not in response_json: + raise BackendError( + f'Unexpected response from the service. Response: {response_json}') + return response_json['secret'] + + def get_gcloud_credential(self) -> str: + """Retrieves the Google Cloud SDK credential attached to the current + kernel. + Example usage: + client = UserSecretsClient() + credential_json = client.get_gcloud_credential() + """ try: - with urllib.request.urlopen(req, timeout=TIMEOUT_SECS) as response: - response_json = json.loads(response.read()) - if not response_json.get('wasSuccessful') or 'result' not in response_json: - raise BackendError( - f'Unexpected response from the service. Response: {response_json}.') - return response_json['result'] - except (URLError, socket.timeout) as e: - if isinstance( - e, socket.timeout) or isinstance( - e.reason, socket.timeout): - raise ConnectionError( - 'Timeout error trying to communicate with service. Please ensure internet is on.') from e - raise ConnectionError( - 'Connection error trying to communicate with service.') from e - except HTTPError as e: - if e.code == 401 or e.code == 403: - raise CredentialError( - f'Service responded with error code {e.code}.' - ' Please ensure you have access to the resource.') from e - raise BackendError('Unexpected response from the service.') from e + return self.get_secret("__gcloud_sdk_auth__") + except BackendError as backend_error: + message = str(backend_error.args) + if message.find('No user secrets exist') != -1: + raise NotFoundError('Google Cloud SDK credential not found.') + else: + raise + + def set_gcloud_credentials(self, project=None, account=None): + """Set user credentials attached to the current kernel and optionally the project & account name to the `gcloud` CLI. + + Example usage: + client = UserSecretsClient() + client.set_gcloud_credentials(project="my-gcp-project", account="me@my-org.com") + + !gcloud ai-platform jobs list + """ + creds = self.get_gcloud_credential() + creds_path = self._write_credentials_file(creds) + + subprocess.run(['gcloud', 'config', 'set', 'auth/credential_file_override', creds_path]) + + if project: + os.environ['GOOGLE_CLOUD_PROJECT'] = project + subprocess.run(['gcloud', 'config', 'set', 'project', project]) + + if account: + os.environ['GOOGLE_ACCOUNT'] = account + subprocess.run(['gcloud', 'config', 'set', 'account', account]) + + def set_tensorflow_credential(self, credential): + """Sets the credential for use by Tensorflow""" + + # Write to a local JSON credentials file + self._write_credentials_file(credential) def get_bigquery_access_token(self) -> Tuple[str, Optional[datetime]]: """Retrieves BigQuery access token information from the UserSecrets service. @@ -99,14 +122,25 @@ def get_bigquery_access_token(self) -> Tuple[str, Optional[datetime]]: """ return self._get_access_token(GcpTarget.BIGQUERY) + def _write_credentials_file(self, credentials) -> str: + adc_path = os.path.join(os.environ.get('HOME', '/'), 'gcloud_credential.json') + with open(adc_path, 'w') as f: + f.write(credentials) + os.environ['GOOGLE_APPLICATION_CREDENTIALS']=adc_path + + return adc_path + def _get_gcs_access_token(self) -> Tuple[str, Optional[datetime]]: return self._get_access_token(GcpTarget.GCS) + def _get_cloudai_access_token(self) -> Tuple[str, Optional[datetime]]: + return self._get_access_token(GcpTarget.CLOUDAI) + def _get_access_token(self, target: GcpTarget) -> Tuple[str, Optional[datetime]]: request_body = { 'Target': target.target } - response_json = self._make_post_request(request_body) + response_json = self.web_client.make_post_request(request_body, self.GET_USER_SECRET_ENDPOINT) if 'secret' not in response_json: raise BackendError( f'Unexpected response from the service. Response: {response_json}') diff --git a/patches/kaggle_session.py b/patches/kaggle_session.py new file mode 100644 index 00000000..30679c86 --- /dev/null +++ b/patches/kaggle_session.py @@ -0,0 +1,26 @@ +""" +This library adds support for retrieving data related to the current user session. +""" + +import os + +from kaggle_web_client import KaggleWebClient + + +class UserSessionClient(): + GET_SOURCE_ENDPOINT = '/requests/GetKernelRunSourceForCaipRequest' + + def __init__(self): + self.web_client = KaggleWebClient() + + def get_exportable_ipynb(self): + """Fetch the .ipynb source of the current notebook session. + + If Kaggle datasets are attached to the notebook, the source will + include an additonnal cell with logic to download the datasets + outside the Kaggle platform. + """ + request_body = { + 'UseDraft': True, + } + return self.web_client.make_post_request(request_body, self.GET_SOURCE_ENDPOINT) diff --git a/patches/kaggle_web_client.py b/patches/kaggle_web_client.py new file mode 100644 index 00000000..f7b7ae8b --- /dev/null +++ b/patches/kaggle_web_client.py @@ -0,0 +1,65 @@ +import json +import os +import socket +import urllib.request +from urllib.error import HTTPError, URLError + +_KAGGLE_DEFAULT_URL_BASE = "https://www.kaggle.com" +_KAGGLE_URL_BASE_ENV_VAR_NAME = "KAGGLE_URL_BASE" +_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME = "KAGGLE_USER_SECRETS_TOKEN" +_KAGGLE_IAP_TOKEN_ENV_VAR_NAME = "KAGGLE_IAP_TOKEN" +TIMEOUT_SECS = 40 + +class CredentialError(Exception): + pass + + +class BackendError(Exception): + pass + + +class KaggleWebClient: + + def __init__(self): + url_base_override = os.getenv(_KAGGLE_URL_BASE_ENV_VAR_NAME) + self.url_base = url_base_override or _KAGGLE_DEFAULT_URL_BASE + # Follow the OAuth 2.0 Authorization standard (https://tools.ietf.org/html/rfc6750) + self.jwt_token = os.getenv(_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME) + if self.jwt_token is None: + raise CredentialError( + 'A JWT Token is required to call Kaggle, ' + f'but none found in environment variable {_KAGGLE_USER_SECRETS_TOKEN_ENV_VAR_NAME}') + self.headers = { + 'Content-type': 'application/json', + 'X-Kaggle-Authorization': f'Bearer {self.jwt_token}', + } + iap_token = os.getenv(_KAGGLE_IAP_TOKEN_ENV_VAR_NAME) + if iap_token: + self.headers['Authorization'] = f'Bearer {iap_token}' + + def make_post_request(self, data: dict, endpoint: str, timeout: int = TIMEOUT_SECS) -> dict: + url = f'{self.url_base}{endpoint}' + request_body = dict(data) + req = urllib.request.Request(url, headers=self.headers, data=bytes( + json.dumps(request_body), encoding="utf-8")) + try: + with urllib.request.urlopen(req, timeout=timeout) as response: + response_json = json.loads(response.read()) + if not response_json.get('wasSuccessful') or 'result' not in response_json: + raise BackendError( + f'Unexpected response from the service. Response: {response_json}.') + return response_json['result'] + except (URLError, socket.timeout) as e: + if isinstance( + e, socket.timeout) or isinstance( + e.reason, socket.timeout): + raise ConnectionError( + 'Timeout error trying to communicate with service. Please ensure internet is on.') from e + raise ConnectionError( + 'Connection error trying to communicate with service.') from e + except HTTPError as e: + if e.code == 401 or e.code == 403: + raise CredentialError( + f'Service responded with error code {e.code}.' + ' Please ensure you have access to the resource.') from e + raise BackendError('Unexpected response from the service.') from e diff --git a/patches/keras_internal.py b/patches/keras_internal.py new file mode 100644 index 00000000..e28127f9 --- /dev/null +++ b/patches/keras_internal.py @@ -0,0 +1,24 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Access to Keras function with a different internal and external path.""" + +from tf_keras.src.engine import data_adapter as _data_adapter +from tf_keras.src.models import Functional +from tf_keras.layers import DenseFeatures +from tf_keras.src.utils.dataset_creator import DatasetCreator + + +unpack_x_y_sample_weight = _data_adapter.unpack_x_y_sample_weight +get_data_handler = _data_adapter.get_data_handler diff --git a/patches/keras_internal_test.py b/patches/keras_internal_test.py new file mode 100644 index 00000000..edc33ec2 --- /dev/null +++ b/patches/keras_internal_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf +from tensorflow_decision_forests.keras import keras_internal + + +# Does nothing. Ensures keras_internal can be loaded. + +if __name__ == "__main__": + tf.test.main() + diff --git a/patches/log.py b/patches/log.py deleted file mode 100644 index 88040ba3..00000000 --- a/patches/log.py +++ /dev/null @@ -1,132 +0,0 @@ -import logging -import os - -import google.auth - - -_LOG_TO_FILE_ENV = os.getenv("KAGGLE_LOG_TO_FILE") - - -class _LogFormatter(logging.Formatter): - """A logging formatter which truncates long messages.""" - - _MAX_LOG_LENGTH = 10000 # Be generous, not to truncate long backtraces. - - def format(self, record): - msg = super(_LogFormatter, self).format(record) - return msg[:_LogFormatter._MAX_LOG_LENGTH] if msg else msg - -# TODO(vimota): Clean this up once we're using python 3.8 and can use -# (https://github.com/python/cpython/commit/dde9fdbe453925279ac3d2a6a72102f6f9ef247c) -# Right now, making the logging module display the intended frame's information -# when the logging calls (info, warn, ...) are wrapped (as is the case in our -# Log class) involves fragile logic. -class _Logger(logging.Logger): - - # This is a copy of logging.Logger.findCaller with the filename ignore - # set expanded to include the current filename (".../log.py"). - # Copyright 2001-2015 by Vinay Sajip. All Rights Reserved. - # License: https://github.com/python/cpython/blob/ce9e62544571e7ade7186697d5dd065fb4c5243f/LICENSE - def findCaller(self, stack_info=False): - f = logging.currentframe() - f = f.f_back - rv = "(unknown file)", 0, "(unknown function)", None - while hasattr(f, "f_code"): - co = f.f_code - filename = os.path.normcase(co.co_filename) - if filename in _ignore_srcfiles: - f = f.f_back - continue - sinfo = None - if stack_info: - sio = io.StringIO() - sio.write('Stack (most recent call last):\n') - traceback.print_stack(f, file=sio) - sinfo = sio.getvalue() - if sinfo[-1] == '\n': - sinfo = sinfo[:-1] - sio.close() - rv = (co.co_filename, f.f_lineno, co.co_name, sinfo) - break - return rv - - -_srcfile = os.path.normcase(_Logger.findCaller.__code__.co_filename) -_ignore_srcfiles = (_srcfile, logging._srcfile) - -class Log: - """ Helper aggregate for all things related to logging activity. """ - - _GLOBAL_LOG = logging.getLogger("") - _initialized = False - - # These are convenience helpers. For performance, consider saving Log.get_logger() and using that - @staticmethod - def critical(msg, *args, **kwargs): - Log._GLOBAL_LOG.critical(msg, *args, **kwargs) - - @staticmethod - def fatal(msg, *args, **kwargs): - Log._GLOBAL_LOG.fatal(msg, *args, **kwargs) - - @staticmethod - def exception(msg, *args, **kwargs): - Log._GLOBAL_LOG.exception(msg, *args, **kwargs) - - @staticmethod - def error(msg, *args, **kwargs): - Log._GLOBAL_LOG.error(msg, *args, **kwargs) - - @staticmethod - def warn(msg, *args, **kwargs): - Log._GLOBAL_LOG.warn(msg, *args, **kwargs) - - @staticmethod - def warning(msg, *args, **kwargs): - Log._GLOBAL_LOG.warning(msg, *args, **kwargs) - - @staticmethod - def debug(msg, *args, **kwargs): - Log._GLOBAL_LOG.debug(msg, *args, **kwargs) - - @staticmethod - def info(msg, *args, **kwargs): - Log._GLOBAL_LOG.info(msg, *args, **kwargs) - - @staticmethod - def set_level(loglevel): - if isinstance(loglevel, int): - Log._GLOBAL_LOG.setLevel(loglevel) - return - elif isinstance(loglevel, str): - # idea from https://docs.python.org/3.5/howto/logging.html#logging-to-a-file - numeric_level = getattr(logging, loglevel.upper(), None) - if isinstance(numeric_level, int): - Log._GLOBAL_LOG.setLevel(numeric_level) - return - - raise ValueError('Invalid log level: %s' % loglevel) - - @staticmethod - def _static_init(): - if Log._initialized: - return - - logging.setLoggerClass(_Logger) - # The root logger's type is unfortunately (and surprisingly) not affected by - # `setLoggerClass`. Monkey patch it instead. TODO(vimota): Remove this, see the TODO - # associated with _Logger. - logging.RootLogger.findCaller = _Logger.findCaller - log_to_file = _LOG_TO_FILE_ENV.lower() in ("yes", "true", "t", "1") if _LOG_TO_FILE_ENV is not None else True - if log_to_file: - handler = logging.FileHandler(filename='/tmp/kaggle.log', mode='w') - else: - handler = logging.StreamHandler() - - # ".1s" is for the first letter: http://stackoverflow.com/a/27453084/1869. - format_string = "%(asctime)s %(levelname).1s %(process)d %(filename)s:%(lineno)d] %(message)s" - handler.setFormatter(_LogFormatter(format_string)) - logging.basicConfig(level=logging.INFO, handlers=[handler]) - Log._initialized = True - -Log._static_init() \ No newline at end of file diff --git a/patches/nbconvert-extensions.tpl b/patches/nbconvert-extensions.tpl index b3de090d..e40f0013 100644 --- a/patches/nbconvert-extensions.tpl +++ b/patches/nbconvert-extensions.tpl @@ -4,7 +4,7 @@ All cell metadata starting with '_kg_' will be included with its value ({key}-{v as a class in the cell's DIV container #} -{% extends 'full.tpl'%} +{% extends 'classic/index.html.j2'%} {% block any_cell %}
{{ super() }} diff --git a/patches/sitecustomize.py b/patches/sitecustomize.py index f0dab6ca..1bb8a1b6 100644 --- a/patches/sitecustomize.py +++ b/patches/sitecustomize.py @@ -1,13 +1,27 @@ +import logging import os -from log import Log - import sys +import importlib.abc import importlib import importlib.machinery +import wrapt + class GcpModuleFinder(importlib.abc.MetaPathFinder): - _MODULES = ['google.cloud.bigquery', 'google.cloud.storage'] + _MODULES = [ + 'google.cloud.bigquery', + 'google.cloud.storage', + 'google.cloud.translate', + 'google.cloud.translate_v2', + 'google.cloud.translate_v3', + 'google.cloud.language', + 'google.cloud.language_v1', + 'google.cloud.videointelligence', + 'google.cloud.videointelligence_v1', + 'google.cloud.vision', + 'google.cloud.vision_v1', + ] _KAGGLE_GCP_PATH = 'kaggle_gcp.py' def __init__(self): pass @@ -39,7 +53,16 @@ def create_module(self, spec): import kaggle_gcp _LOADERS = { 'google.cloud.bigquery': kaggle_gcp.init_bigquery, - 'google.cloud.storage': kaggle_gcp.init_gcs + 'google.cloud.storage': kaggle_gcp.init_gcs, + 'google.cloud.translate': kaggle_gcp.init_translation_v3, + 'google.cloud.translate_v2': kaggle_gcp.init_translation_v2, + 'google.cloud.translate_v3': kaggle_gcp.init_translation_v3, + 'google.cloud.language': kaggle_gcp.init_natural_language, + 'google.cloud.language_v1': kaggle_gcp.init_natural_language, + 'google.cloud.videointelligence': kaggle_gcp.init_video_intelligence, + 'google.cloud.videointelligence_v1': kaggle_gcp.init_video_intelligence, + 'google.cloud.vision': kaggle_gcp.init_vision, + 'google.cloud.vision_v1': kaggle_gcp.init_vision } monkeypatch_gcp_module = _LOADERS[spec.name]() return monkeypatch_gcp_module @@ -47,6 +70,74 @@ def create_module(self, spec): def exec_module(self, module): pass - if not hasattr(sys, 'frozen'): sys.meta_path.insert(0, GcpModuleFinder()) + +@wrapt.when_imported('google.generativeai') +def post_import_logic(module): + if os.getenv('KAGGLE_DISABLE_GOOGLE_GENERATIVE_AI_INTEGRATION') != None: + return + if (os.getenv('KAGGLE_DATA_PROXY_TOKEN') == None or + os.getenv('KAGGLE_USER_SECRETS_TOKEN') == None or + (os.getenv('KAGGLE_DATA_PROXY_URL') == None and + os.getenv('KAGGLE_GRPC_DATA_PROXY_URL') == None)): + return + + old_configure = module.configure + + def new_configure(*args, **kwargs): + if ('default_metadata' in kwargs): + default_metadata = kwargs['default_metadata'] + else: + default_metadata = [] + default_metadata.append(("x-kaggle-proxy-data", os.environ['KAGGLE_DATA_PROXY_TOKEN'])) + user_secrets_token = os.environ['KAGGLE_USER_SECRETS_TOKEN'] + default_metadata.append(('x-kaggle-authorization', f'Bearer {user_secrets_token}')) + kwargs['default_metadata'] = default_metadata + + if ('client_options' in kwargs): + client_options = kwargs['client_options'] + else: + client_options = {} + + if os.getenv('KAGGLE_GOOGLE_GENERATIVE_AI_USE_REST_ONLY') != None: + kwargs['transport'] = 'rest' + + if 'transport' in kwargs and kwargs['transport'] == 'rest': + client_options['api_endpoint'] = os.environ['KAGGLE_DATA_PROXY_URL'] + client_options['api_endpoint'] += '/palmapi' + else: + client_options['api_endpoint'] = os.environ['KAGGLE_GRPC_DATA_PROXY_URL'] + kwargs['client_options'] = client_options + + old_configure(*args, **kwargs) + + module.configure = new_configure + module.configure() # generativeai can use GOOGLE_API_KEY env variable, so make sure we have the other configs set + +@wrapt.when_imported('google.genai') +def post_genai_import_logic(module): + if os.getenv('KAGGLE_DISABLE_GOOGLE_GENERATIVE_AI_INTEGRATION'): + return + + if not (os.getenv('KAGGLE_DATA_PROXY_TOKEN') and + os.getenv('KAGGLE_USER_SECRETS_TOKEN') and + os.getenv('KAGGLE_DATA_PROXY_URL')): + return + @wrapt.patch_function_wrapper(module, 'Client.__init__') + def init_wrapper(wrapped, instance, args, kwargs): + # Don't want to forward requests that are to Vertex AI, debug mode, or have their own http_options specified + # Thus, if the client constructor contains any params other than api_key, we don't set up forwarding + if any(value is not None for key, value in kwargs.items() if key != 'api_key'): + return wrapped(*args, **kwargs) + + default_metadata = { + "x-kaggle-proxy-data": os.environ['KAGGLE_DATA_PROXY_TOKEN'], + 'x-kaggle-authorization': f"Bearer {os.environ['KAGGLE_USER_SECRETS_TOKEN']}" + } + http_options = { + 'base_url': os.getenv('KAGGLE_DATA_PROXY_URL') + '/palmapi/', + 'headers': default_metadata + } + kwargs['http_options'] = http_options + return wrapped(*args, **kwargs) diff --git a/patches/template_conf.json b/patches/template_conf.json new file mode 100644 index 00000000..49cc88c2 --- /dev/null +++ b/patches/template_conf.json @@ -0,0 +1,13 @@ +{ + "base_template": "classic", + "mimetypes": { + "text/html": true + }, + "preprocessors": { + "100-pygments": { + "type": "nbconvert.preprocessors.CSSHTMLHeaderPreprocessor", + "enabled": true, + "style": "default" + } + } +} \ No newline at end of file diff --git a/push b/push index c3ebcaf4..124a3469 100755 --- a/push +++ b/push @@ -7,11 +7,14 @@ Usage: $0 [OPTIONS] [LABEL] Push a newly-built image with the given LABEL to gcr.io and DockerHub. Options: - -g, --gpu Push the image with GPU support. + -g, --gpu Push the image with GPU support. + -t, --tpu Push the image with GPU support. + -s, --source-image IMAGE Tag for the source image. EOF } -SOURCE_IMAGE='kaggle/python-build' +SOURCE_IMAGE_TAG='kaggle/python-build:latest' +SOURCE_IMAGE_TAG_OVERRIDE='' TARGET_IMAGE='gcr.io/kaggle-images/python' while :; do @@ -21,9 +24,22 @@ while :; do exit ;; -g|--gpu) - SOURCE_IMAGE='kaggle/python-gpu-build' + SOURCE_IMAGE_TAG='kaggle/python-gpu-build:latest' TARGET_IMAGE='gcr.io/kaggle-private-byod/python' ;; + -t|--tpu) + SOURCE_IMAGE_TAG='kaggle/python-tpuvm-build:latest' + TARGET_IMAGE='gcr.io/kaggle-private-byod/python-tpuvm' + ;; + -s|--source-image) + if [[ -z $2 ]]; then + usage + printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 + exit + fi + SOURCE_IMAGE_TAG_OVERRIDE=$2 + shift # skip the flag value + ;; -?*) usage printf 'ERROR: Unknown option: %s\n' "$1" >&2 @@ -38,16 +54,14 @@ done LABEL=${1:-testing} -readonly SOURCE_IMAGE +if [[ -n "$SOURCE_IMAGE_TAG_OVERRIDE" ]]; then + SOURCE_IMAGE_TAG="$SOURCE_IMAGE_TAG_OVERRIDE" +fi + +readonly SOURCE_IMAGE_TAG readonly TARGET_IMAGE readonly LABEL set -x -docker tag "${SOURCE_IMAGE}:latest" "${TARGET_IMAGE}:${LABEL}" +docker tag "${SOURCE_IMAGE_TAG}" "${TARGET_IMAGE}:${LABEL}" gcloud docker -- push "${TARGET_IMAGE}:${LABEL}" - -# Only CPU images are made public at this time. -if [[ "$LABEL" == "latest" && SOURCE_IMAGE = "kaggle/python-build" ]]; then - docker tag "${SOURCE_IMAGE}:latest" "kaggle/python:${LABEL}" - docker push "kaggle/python:${LABEL}" -fi diff --git a/renderizer/Dockerfile b/renderizer/Dockerfile new file mode 100644 index 00000000..9faac229 --- /dev/null +++ b/renderizer/Dockerfile @@ -0,0 +1,12 @@ +# Image used to generate the Dockerfiles from a Go text template. +# +# Build: +# docker build --rm --pull -t gcr.io/kaggle-images/go-renderizer -f Dockerfile . +# +# Push: +# docker push gcr.io/kaggle-images/go-renderizer +FROM golang:1.17 + +RUN go install github.com/gomatic/renderizer/v2/cmd/renderizer@v2.0.13 + +ENTRYPOINT ["renderizer"] \ No newline at end of file diff --git a/tensorflow-whl/CHANGELOG.md b/tensorflow-whl/CHANGELOG.md deleted file mode 100644 index b1dd8589..00000000 --- a/tensorflow-whl/CHANGELOG.md +++ /dev/null @@ -1,7 +0,0 @@ -1.11.0-py36: TensorFlow 1.11.0 wheels built with python 3.6 -1.12.0-py36: TensorFlow 1.12.0 wheels with Cuda 9.2 -1.13.1-py36: TensorFlow 1.13.1 wheels with Cuda 10.0 -1.13.1-py36-2: TensorFlow 1.13.1 wheels with Cuda 10.0 and bump anaconda3 base image version to 5.3.0 -1.13.1-py37: TensorFlow 1.13.1 with Python 3.7.0 and bump anaconda3 base image version to 5.3.0 -1.13.1-py37-2: TensorFlow 1.13.1 with Python 3.7.3 -1.14.0-py36: TensorFlow 1.14.0 with Python 3.6 diff --git a/tensorflow-whl/Dockerfile b/tensorflow-whl/Dockerfile deleted file mode 100644 index 5e24c67c..00000000 --- a/tensorflow-whl/Dockerfile +++ /dev/null @@ -1,99 +0,0 @@ -FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04 AS nvidia -FROM continuumio/anaconda3:2019.03 - -# Avoid interactive configuration prompts/dialogs during apt-get. -ENV DEBIAN_FRONTEND=noninteractive - -# This is necessary to for apt to access HTTPS sources -RUN apt-get update && \ - apt-get install apt-transport-https - -# Cuda support -COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/sources.list.d/nvidia-ml.list /etc/apt/sources.list.d/ -COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg - -# Ensure the cuda libraries are compatible with the GPU image. -# TODO(b/120050292): Use templating to keep in sync. -ENV CUDA_VERSION=10.0.130 -ENV CUDA_PKG_VERSION=10-0=$CUDA_VERSION-1 -LABEL com.nvidia.volumes.needed="nvidia_driver" -LABEL com.nvidia.cuda.version="${CUDA_VERSION}" -ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -# The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems. -# When intended to be used with actual GPUs, make sure to (besides providing access to the host -# CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One -# convenient way to do so is to obscure its contents by a bind mount: -# docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ... -ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV NVIDIA_REQUIRE_CUDA="cuda>=10.0" -RUN apt-get update && apt-get install -y --no-install-recommends \ - cuda-cupti-$CUDA_PKG_VERSION \ - cuda-cudart-$CUDA_PKG_VERSION \ - cuda-cudart-dev-$CUDA_PKG_VERSION \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-nvml-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - libcudnn7=7.5.0.56-1+cuda10.0 \ - libcudnn7-dev=7.5.0.56-1+cuda10.0 \ - libnccl2=2.4.2-1+cuda10.0 \ - libnccl-dev=2.4.2-1+cuda10.0 && \ - ln -s /usr/local/cuda-10.0 /usr/local/cuda && \ - ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 - -# Work to upgrade to Python 3.7 can be found on this branch: https://github.com/Kaggle/docker-python/blob/upgrade-py37/Dockerfile -RUN conda install -y python=3.6.6 && pip install --upgrade pip && pip install numpy - -# The tested build configurations for Tensorflow 1.14 uses Bazel 0.24.1: https://www.tensorflow.org/install/source#tested_build_configurations -ENV BAZEL_VERSION=0.24.1 -RUN apt-get install -y gnupg zip openjdk-8-jdk && \ - apt-get install -y --no-install-recommends \ - bash-completion \ - zlib1g-dev && \ - wget --no-verbose "https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel_${BAZEL_VERSION}-linux-x86_64.deb" && \ - dpkg -i bazel_*.deb && \ - rm bazel_*.deb - -# Fetch tensorflow & install dependencies. -RUN cd /usr/local/src && \ - git clone https://github.com/tensorflow/tensorflow && \ - cd tensorflow && \ - git checkout r1.14 && \ - pip install keras_applications --no-deps && \ - pip install keras_preprocessing --no-deps - -# Create a tensorflow wheel for CPU -RUN cd /usr/local/src/tensorflow && \ - cat /dev/null | ./configure && \ - bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_cpu && \ - bazel clean - -# Create a tensorflow wheel for GPU/cuda -ENV TF_NEED_CUDA=1 -ENV TF_CUDA_VERSION=10.0 -# 3.7 is for the K80 and 6.0 is for the P100, 7.5 is for the T4: https://developer.nvidia.com/cuda-gpus -ENV TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.0,7.5 -ENV TF_CUDNN_VERSION=7 -ENV TF_NCCL_VERSION=2 -ENV NCCL_INSTALL_PATH=/usr/ - -RUN cd /usr/local/src/tensorflow && \ - # TF_NCCL_INSTALL_PATH is used for both libnccl.so.2 and libnccl.h. Make sure they are both accessible from the same directory. - ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/ && \ - cat /dev/null | ./configure && \ - echo "/usr/local/cuda-${TF_CUDA_VERSION}/targets/x86_64-linux/lib/stubs" > /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \ - bazel build --config=opt \ - --config=cuda \ - --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ - //tensorflow/tools/pip_package:build_pip_package && \ - rm /etc/ld.so.conf.d/cuda-stubs.conf && ldconfig && \ - bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_gpu && \ - bazel clean - -# Print out the built .whl files -RUN ls -R /tmp/tensorflow* diff --git a/tensorflow-whl/Jenkinsfile b/tensorflow-whl/Jenkinsfile deleted file mode 100644 index 9a9cbd60..00000000 --- a/tensorflow-whl/Jenkinsfile +++ /dev/null @@ -1,36 +0,0 @@ -pipeline { - agent { label 'ephemeral-linux-gpu' } - options { - // The Build GPU stage depends on the image from the Push CPU stage - disableConcurrentBuilds() - } - environment { - GIT_COMMIT_SHORT = sh(returnStdout: true, script:"git rev-parse --short=7 HEAD").trim() - GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim() - GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim() - GIT_COMMIT_SUMMARY = "`` ${GIT_COMMIT_SUBJECT} - ${GIT_COMMIT_AUTHOR}" - } - - stages { - stage('Build') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - cd tensorflow-whl/ - ./build | ts - ''' - } - } - stage('Push') { - steps { - sh '''#!/bin/bash - set -exo pipefail - - cd tensorflow-whl/ - ./push ${GIT_BRANCH}-staging - ''' - } - } - } -} diff --git a/tensorflow-whl/README.md b/tensorflow-whl/README.md deleted file mode 100644 index 02c74d14..00000000 --- a/tensorflow-whl/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Build new Tensorflow wheels - -``` -./build -``` - -# Push the new wheels (Kaggle Engineers only) - -1. Add an entry in the [CHANGELOG](CHANGELOG.md) with an appropriate `LABEL`. -2. Push the new image using the `LABEL` you picked above. - - ``` - ./push LABEL - ``` - -# Use the new wheels - -Update the line below in the [CPU Dockerfile](../Dockerfile) and the [GPU Dockerfile](../gpu.Dockerfile) to use the new `LABEL`. - -To use wheels built locally: -``` -FROM kaggle/python-tensorflow-whl as tensorflow_whl -``` - -To use our pre-built wheels: -``` -FROM gcr.io/kaggle-images/python-tensorflow-whl: