diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 7561a700..9d661201 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -1,7 +1,8 @@ -ARG BASE_IMAGE \ - BASE_IMAGE_TAG - -FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} +{{ if eq .Accelerator "gpu" }} +FROM us-docker.pkg.dev/colab-images/public/runtime:release-colab-external_20260126-060048_RC00 +{{ else }} +FROM us-docker.pkg.dev/colab-images/public/cpu-runtime:release-colab-external_20260123-060023_RC00 +{{ end}} ADD kaggle_requirements.txt /kaggle_requirements.txt @@ -12,58 +13,32 @@ RUN pip freeze | grep -E 'tensorflow|keras|torch|jax' > /colab_requirements.txt RUN cat /colab_requirements.txt >> /requirements.txt RUN cat /kaggle_requirements.txt >> /requirements.txt -# TODO: GPU requirements.txt -# TODO: merge them better (override matching ones). - # Install Kaggle packages -RUN uv pip install --system -r /requirements.txt +RUN uv pip install --system --no-cache -r /requirements.txt # Install manual packages: # b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data. -RUN uv pip uninstall --system google-cloud-bigquery-storage - -# b/394382016: sigstore (dependency of kagglehub) requires a prerelease packages, installing separate. -# b/408284143: google-cloud-automl 2.0.0 introduced incompatible API changes, need to pin to 1.0.1, -# installed outside of kaggle_requirements.txt due to requiring an incompatibile version of protobuf. -RUN uv pip install --system --force-reinstall --prerelease=allow "kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.12" \ - "google-cloud-automl==1.0.1" +RUN uv pip uninstall --system --no-cache google-cloud-bigquery-storage # uv cannot install this in requirements.txt without --no-build-isolation # to avoid affecting the larger build, we'll post-install it. -RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools" - -# b/408281617: Torch is adamant that it can not install cudnn 9.3.x, only 9.1.x, but Tensorflow can only support 9.3.x. -# This conflict causes a number of package downgrades, which are handled in this command -RUN uv pip install \ - --index-url https://pypi.nvidia.com --extra-index-url https://pypi.org/simple/ --index-strategy unsafe-first-match \ - --system --force-reinstall "cuml-cu12==25.2.1" \ - "nvidia-cudnn-cu12==9.3.0.75" "nvidia-cublas-cu12==12.5.3.2" "nvidia-cusolver-cu12==11.6.3.83" \ - "nvidia-cuda-cupti-cu12==12.5.82" "nvidia-cuda-nvrtc-cu12==12.5.82" "nvidia-cuda-runtime-cu12==12.5.82" \ - "nvidia-cufft-cu12==11.2.3.61" "nvidia-curand-cu12==10.3.6.82" "nvidia-cusparse-cu12==12.5.1.3" \ - "nvidia-nvjitlink-cu12==12.5.82" -RUN uv pip install --system --force-reinstall "pynvjitlink-cu12==0.5.2" - -# b/385145217 Latest Colab lacks mkl numpy, install it. -RUN uv pip install --system --force-reinstall -i https://pypi.anaconda.org/intel/simple numpy - -# newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason -RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2" +RUN uv pip install --no-build-isolation --no-cache --system "git+https://github.com/Kaggle/learntools" -# b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune. -# b/415358158: Gensim removed from Colab image to upgrade scipy -RUN uv pip install --system --force-reinstall --no-deps torchtune gensim "scipy<=1.15.3" +# b/404590350: Ray and torchtune have conflicting cli named `tune`. `ray` is not part of Colab's base image. Re-install `tune` to ensure the torchtune CLI is available by default. +# b/468367647: Unpin protobuf, version greater than v5.29.5 causes issues with numerous packages +RUN uv pip install --system --force-reinstall --no-cache --no-deps torchtune +RUN uv pip install --system --force-reinstall --no-cache "protobuf==5.29.5" # Adding non-package dependencies: ADD clean-layer.sh /tmp/clean-layer.sh ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl ADD patches/template_conf.json /opt/kaggle/conf.json -# /opt/conda/lib/python3.11/site-packages -ARG PACKAGE_PATH=/usr/local/lib/python3.11/dist-packages +ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages # Install GPU-specific non-pip packages. {{ if eq .Accelerator "gpu" }} -RUN uv pip install --system "pycuda" +RUN uv pip install --system --no-cache "pycuda" {{ end }} @@ -84,12 +59,9 @@ ADD patches/keras_internal.py \ $PACKAGE_PATH/tensorflow_decision_forests/keras/ RUN apt-get install -y libfreetype6-dev && \ - apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing + apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ + /tmp/clean-layer.sh -# NLTK Project datasets -# b/408298750: We currently reinstall the package, because we get the following error: -# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?` -RUN uv pip install --system --force-reinstall "nltk>=3.9.1" RUN mkdir -p /usr/share/nltk_data && \ # NLTK Downloader no longer continues smoothly after an error, so we explicitly list # the corpuses that work @@ -130,7 +102,8 @@ RUN mkdir -p /root/.EasyOCR/model && \ /tmp/clean-layer.sh # Tesseract and some associated utility packages -RUN apt-get install tesseract-ocr -y +RUN apt-get install tesseract-ocr -y && \ + /tmp/clean-layer.sh ENV TESSERACT_PATH=/usr/bin/tesseract \ # For Facets, we also include an empty path to include $PWD. @@ -146,19 +119,10 @@ RUN mkdir -p /root/.jupyter && touch /root/.jupyter/jupyter_nbconvert_config.py mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ /tmp/clean-layer.sh -# Fix to import bq_helper library without downgrading setuptools and upgrading protobuf -RUN mkdir -p ~/src && git clone https://github.com/SohierDane/BigQuery_Helper ~/src/BigQuery_Helper && \ - mkdir -p ~/src/BigQuery_Helper/bq_helper && \ - mv ~/src/BigQuery_Helper/bq_helper.py ~/src/BigQuery_Helper/bq_helper/__init__.py && \ - mv ~/src/BigQuery_Helper/test_helper.py ~/src/BigQuery_Helper/bq_helper/ && \ - sed -i 's/)/packages=["bq_helper"])/g' ~/src/BigQuery_Helper/setup.py && \ - uv pip install --system -e ~/src/BigQuery_Helper "protobuf<3.21"&& \ - /tmp/clean-layer.sh - - # install imagemagick for wand # https://docs.wand-py.org/en/latest/guide/install.html#install-imagemagick-on-debian-ubuntu -RUN apt-get install libmagickwand-dev +RUN apt-get install libmagickwand-dev && \ + /tmp/clean-layer.sh # Override default imagemagick policies ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml @@ -175,12 +139,11 @@ ADD patches/kaggle_gcp.py \ patches/kaggle_session.py \ patches/kaggle_web_client.py \ patches/kaggle_datasets.py \ - patches/log.py \ $PACKAGE_PATH/ # Figure out why this is in a different place? # Found by doing a export PYTHONVERBOSE=1 and then running python and checking for where it looked for it. -ADD patches/sitecustomize.py /usr/lib/python3.11/sitecustomize.py +ADD patches/sitecustomize.py /usr/lib/python3.12/sitecustomize.py ARG GIT_COMMIT=unknown \ BUILD_DATE=unknown diff --git a/Jenkinsfile b/Jenkinsfile index 906e0464..c4af03e6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -187,13 +187,13 @@ pipeline { post { failure { - mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL + mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } success { - mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL + mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } aborted { - mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL + mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY} @dockerops", channel: env.MATTERMOST_CHANNEL } } } diff --git a/build b/build index 9b20f2dc..83bbe577 100755 --- a/build +++ b/build @@ -47,18 +47,13 @@ done BUILD_ARGS+=" --build-arg GIT_COMMIT=$(git rev-parse HEAD)" BUILD_ARGS+=" --build-arg BUILD_DATE=$(date '+%Y%m%d-%H%M%S')" -# Read build args from config.txt file. -SRCDIR=$(dirname "${BASH_SOURCE[0]}") -for l in `cat ${SRCDIR}/config.txt`; do - BUILD_ARGS+=" --build-arg $l" -done - readonly CACHE_FLAG readonly DOCKERFILE readonly ACCELERATOR readonly IMAGE_TAG readonly BUILD_ARGS +SRCDIR=$(dirname "${BASH_SOURCE[0]}") DOCKERFILE_OUTDIR="${SRCDIR}/.generated" mkdir -p $DOCKERFILE_OUTDIR DOCKERFILE_PATH="$DOCKERFILE_OUTDIR/$DOCKERFILE" diff --git a/clean-layer.sh b/clean-layer.sh index 467e1cac..9a50e7bf 100755 --- a/clean-layer.sh +++ b/clean-layer.sh @@ -10,8 +10,6 @@ set -e set -x -# Delete files that pip caches when installing a package. -rm -rf /root/.cache/pip/* # Delete old downloaded archive files apt-get autoremove -y # Delete downloaded archive files diff --git a/config.txt b/config.txt deleted file mode 100644 index af541652..00000000 --- a/config.txt +++ /dev/null @@ -1,4 +0,0 @@ -BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime -BASE_IMAGE_TAG=release-colab_20250725-060057_RC00 -CUDA_MAJOR_VERSION=12 -CUDA_MINOR_VERSION=5 diff --git a/diff b/diff index c0eb2e18..c8251703 100755 --- a/diff +++ b/diff @@ -104,7 +104,7 @@ fi for cmd in "${CMDS[@]}"; do echo "== Comparing $cmd ==" diff --suppress-common-lines --side-by-side \ - <(docker run -v $PWD/tools:/tools --rm "$BASE_IMAGE_TAG" /bin/bash -c "$cmd") \ - <(docker run -v $PWD/tools:/tools --rm "$TARGET_IMAGE_TAG" /bin/bash -c "$cmd") \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$BASE_IMAGE_TAG" -c "$cmd") \ + <(docker run -v $PWD/tools:/tools --entrypoint bash --rm "$TARGET_IMAGE_TAG" -c "$cmd") \ && echo 'No diff' || true done diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt index c711869e..30e0683f 100644 --- a/kaggle_requirements.txt +++ b/kaggle_requirements.txt @@ -7,14 +7,10 @@ PyArabic PyUpSet Pympler Rtree -shapely<2 +shapely SimpleITK -# b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3 -TPOT==0.12.1 -Theano +TPOT Wand -annoy -arrow bayesian-optimization boto3 catboost @@ -29,27 +25,17 @@ deap dipy docker easyocr -# b/302136621: Fix eli5 import for learntools -eli5 emoji -fastcore -# b/445960030: Requires a newer version of fastai than the currently used base image. -# Remove when relying on a newer base image. -fastai>=2.8.4 fasttext featuretools fiona fury fuzzywuzzy geojson -# geopandas > v0.14.4 breaks learn tools -geopandas==v0.14.4 gensim -# b/443054743 -google-adk +# b/443054743,b/455550872 +google-adk[a2a,eval]>=1.21.0 google-cloud-aiplatform -# b/315753846: Unpin translate package. -google-cloud-translate==3.12.1 google-cloud-videointelligence google-cloud-vision google-genai @@ -62,27 +48,28 @@ ipympl ipywidgets==8.1.5 isoweek jedi +# jitler 0.11.1 breaks simulation image +jiter==0.10.0 # b/276358430: fix Jupyter lsp freezing up the jupyter server jupyter-lsp==1.5.1 # b/333854354: pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354) jupyter_server==2.12.5 +jupyter_server_proxy jupyterlab jupyterlab-lsp -# b/409363708: Ensure we have the update version, we can consider removing it once -# Colab base image is updated more frequently. -kaggle>=1.7.4.2 +kaggle>=1.8.3 kaggle-environments +kagglehub[pandas-datasets,hf-datasets,signing]>=0.4.2 keras-cv keras-nlp keras-tuner kornia langid -# b/328788268: libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'" -libpysal<=4.9.2 +libpysal lime line_profiler mamba -matplotlib<3.8 +matplotlib mlcrate mne mpld3 @@ -91,9 +78,7 @@ nbconvert==6.4.5 nbdev nilearn olefile -# b/445960030: Broken in 1.19.0. See https://github.com/onnx/onnx/issues/7249. -# Fixed with https://github.com/onnx/onnx/pull/7254. Upgrade when version with fix is published. -onnx==1.18.0 +onnx openslide-bin openslide-python optuna @@ -104,15 +89,12 @@ path path.py pdf2image plotly-express -preprocessing pudb pyLDAvis pycryptodome -pydegensac pydicom pyemd pyexcel-ods -pymc3 pymongo pypdf pytesseract @@ -124,33 +106,23 @@ qtconsole ray rgf-python s3fs -# b/302136621: Fix eli5 import for learntools -scikit-learn==1.2.2 +scikit-learn # Scikit-learn accelerated library for x86 scikit-learn-intelex>=2023.0.1 scikit-multilearn scikit-optimize scikit-plot scikit-surprise -# Also pinning seaborn for learntools -seaborn==0.12.2 git+https://github.com/facebookresearch/segment-anything.git -# b/329869023: shap 0.45.0 breaks learntools -shap==0.44.1 squarify -tensorflow-cloud tensorflow-io -tensorflow-text -tensorflow_decision_forests +# Must be compatible with torch version: https://github.com/meta-pytorch/torchcodec?tab=readme-ov-file#installing-torchcodec +torchcodec==0.9 torchinfo torchmetrics torchtune -transformers>=4.51.0 +transformers>=5.0.0 vtk wavio -# b/350573866: xgboost v2.1.0 breaks learntools -xgboost==2.0.3 xvfbwrapper ydata-profiling -# b/443054743: pinned as newer versions requires protobuf > 3.20.3 -ydf==0.9.0 diff --git a/packages/README.md b/packages/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/build_package b/packages/build_package deleted file mode 100755 index e0af53e2..00000000 --- a/packages/build_package +++ /dev/null @@ -1,148 +0,0 @@ -#!/bin/bash -set -e - -usage() { -cat << EOF -Usage: $0 [OPTIONS] -Build a new package ".whl". - -Options: - -p, --package PACKAGE Package to build (e.g. lightgbm). - -v, --version VERSION Package version to build. - -b, --base-image IMAGE Base image tag (e.g. m80). - -c, --use-cache Use layer cache when building a new image. - -f, --force-rebuild Rebuild the image regardless of whether it already exist on GCR. - -u, --push Push image to GCR. - --build-arg ARG=VALUE Build arguments to pass to the docker build command. -EOF -} - -PACKAGE='' -PACKAGE_VERSION='' -BASE_IMAGE='' -DOCKERFILE='' -CACHE_FLAG='--no-cache' -FORCE_REBUILD=false -PUSH_TO_GCR=false -BUILD_ARGS='' - -while :; do - case "$1" in - -h|--help) - usage - exit - ;; - -p|--package) - if [[ -z $2 ]]; then - usage - printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 - exit 1 - fi - PACKAGE=$2 - DOCKERFILE="${PACKAGE}.Dockerfile" - shift # skip the flag value - ;; - -v|--version) - if [[ -z $2 ]]; then - usage - printf 'ERROR: No VERSION specified after the %s flag.\n' "$1" >&2 - exit 1 - fi - PACKAGE_VERSION=$2 - shift # skip the flag value - ;; - -t|--base-image) - if [[ -z $2 ]]; then - usage - printf 'ERROR: No TAG specified after the %s flag.\n' "$1" >&2 - exit 1 - fi - BASE_IMAGE=$2 - shift # skip the flag value - ;; - -c|--use-cache) - CACHE_FLAG='' - ;; - -f|--force-rebuild) - FORCE_REBUILD=true - ;; - -u|--push) - PUSH_TO_GCR=true - ;; - --build-arg) - if [[ -z $2 ]]; then - usage - printf 'ERROR: No ARG=VALUE specified after the %s flag.\n' "$1" >&2 - exit 1 - fi - BUILD_ARGS+=" $1 $2" - shift # skip the flag value - ;; - -?*) - usage - printf 'ERROR: Unknown option: %s\n' "$1" >&2 - exit 1 - ;; - *) - break - esac - - shift -done - -readonly PACKAGE -readonly PACKAGE_VERSION -readonly BASE_IMAGE -readonly DOCKERFILE -readonly CACHE_FLAG -readonly FORCE_REBUILD - -SRCDIR=$(dirname "${BASH_SOURCE[0]}") -DOCKERFILE_PATH="$SRCDIR/$DOCKERFILE" - -if [[ -z "$PACKAGE_VERSION" ]]; then - printf 'ERROR: missing --version flag.\n' - exit 1 -fi - -if [[ -z "$BASE_IMAGE" ]]; then - printf 'ERROR: missing --base-image flag.\n' - exit 1 -fi - -if [[ -z "$DOCKERFILE" ]]; then - printf 'ERROR: missing --package flag.\n' - exit 1 -fi - -# Keep only `release-colab_20240920-060127_RC00` in `us-docker.pkg.dev/colab-images/public/runtime:release-colab_20240920-060127_RC00` -TAG=$(echo $BASE_IMAGE | cut -d ':' -f 2) -# Append the package version -TAG=$TAG-$PACKAGE_VERSION -# Add the gcr repo. -TAG=gcr.io/kaggle-images/python-$PACKAGE-whl:$TAG - -SHOULD_BUILD=true -if ! $FORCE_REBUILD; then - echo "Checking if $TAG exists..." - docker pull $TAG && SHOULD_BUILD=false -fi - -if $SHOULD_BUILD; then - echo "Building $TAG..." - docker build --rm --pull $BUILD_ARGS \ - $CACHE_FLAG \ - -t $TAG \ - -f "$DOCKERFILE_PATH" \ - --build-arg BASE_IMAGE=$BASE_IMAGE \ - --build-arg PACKAGE_VERSION=$PACKAGE_VERSION \ - $SRCDIR - - if $PUSH_TO_GCR; then - echo "Pushing $TAG to GCR..." - docker push $TAG - fi -else - echo "Skipping build. $TAG already exists." - echo "Use --force-rebuild if you want to build a new version anyway." -fi \ No newline at end of file diff --git a/packages/jaxlib.Dockerfile b/packages/jaxlib.Dockerfile deleted file mode 100644 index ed73991c..00000000 --- a/packages/jaxlib.Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -ARG BASE_IMAGE - -FROM ${BASE_IMAGE} AS builder - -ARG PACKAGE_VERSION -ARG CUDA_MAJOR_VERSION -ARG CUDA_MINOR_VERSION - -# Make sure we are on the right version of CUDA -RUN update-alternatives --set cuda /usr/local/cuda-$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION - -# Ensures shared libraries installed with conda can be found by the dynamic link loader. -# For PyTorch, we need specifically mkl. -ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib" -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" - -# Instructions: https://jax.readthedocs.io/en/latest/developer.html#building-jaxlib-from-source -RUN sudo ln -s /usr/bin/python3 /usr/bin/python - -RUN apt-get update && \ - apt-get install -y g++ python3 python3-dev - -RUN pip install numpy wheel build - -RUN cd /usr/local/src && \ - git clone https://github.com/google/jax && \ - cd jax && \ - git checkout jaxlib-v$PACKAGE_VERSION - -RUN cd /usr/local/src/jax && \ - python build/build.py --enable_cuda - -# Using multi-stage builds to ensure the output image is very small -# See: https://docs.docker.com/develop/develop-images/multistage-build/ -FROM alpine:latest - -RUN mkdir -p /tmp/whl/ -COPY --from=builder /usr/local/src/jax/dist/*.whl /tmp/whl - -# Print out the built .whl file. -RUN ls -lh /tmp/whl/ \ No newline at end of file diff --git a/packages/torch.Dockerfile b/packages/torch.Dockerfile deleted file mode 100644 index 68c1eff3..00000000 --- a/packages/torch.Dockerfile +++ /dev/null @@ -1,86 +0,0 @@ -ARG BASE_IMAGE - -FROM ${BASE_IMAGE} AS builder - -ARG PACKAGE_VERSION -ARG TORCHAUDIO_VERSION -ARG TORCHVISION_VERSION -ARG CUDA_MAJOR_VERSION -ARG CUDA_MINOR_VERSION - -# Make sure we are on the right version of CUDA -RUN update-alternatives --set cuda /usr/local/cuda-$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION - -# TORCHVISION_VERSION is mandatory -RUN test -n "$TORCHVISION_VERSION" - -# Use mamba to speed up conda installs -RUN conda install -c conda-forge mamba - -# Build instructions: https://github.com/pytorch/pytorch#from-source -RUN mamba install astunparse numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses -RUN mamba install -c pytorch magma-cuda121 - -# By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash. -# This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000 -ENV PYTORCH_BUILD_VERSION=$PACKAGE_VERSION -ENV PYTORCH_BUILD_NUMBER=1 - -# Ensures shared libraries installed with conda can be found by the dynamic link loader. -# For PyTorch, we need specifically mkl. -ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib" -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" -ENV TORCH_CUDA_ARCH_LIST="6.0;7.0+PTX;7.5+PTX" -ENV FORCE_CUDA=1 -RUN cd /usr/local/src && \ - git clone --recursive https://github.com/pytorch/pytorch && \ - cd pytorch && \ - git checkout tags/v$PACKAGE_VERSION && \ - git submodule sync && \ - git submodule update --init --recursive --jobs 1 && \ - python setup.py bdist_wheel - -# Install torch which is required before we can build other torch* packages. -RUN pip install /usr/local/src/pytorch/dist/*.whl - -# Build torchaudio -# Instructions: https://github.com/pytorch/audio#from-source -# See comment above for PYTORCH_BUILD_VERSION. -ENV BUILD_VERSION=$TORCHAUDIO_VERSION -RUN sudo apt-get update && \ - # ncurses.h is required for this install - sudo apt-get install libncurses-dev && \ - # Fixing the build: https://github.com/pytorch/audio/issues/666#issuecomment-635928685 - mamba install -c conda-forge ncurses && \ - cd /usr/local/src && \ - git clone https://github.com/pytorch/audio && \ - cd audio && \ - git checkout tags/v$TORCHAUDIO_VERSION && \ - git submodule sync && \ - git submodule update --init --recursive --jobs 1 -# https://github.com/pytorch/audio/issues/936#issuecomment-702990346 -RUN sed -i 's/set(envs/set(envs\n "LIBS=-ltinfo"/' /usr/local/src/audio/third_party/sox/CMakeLists.txt -RUN cd /usr/local/src/audio && python setup.py bdist_wheel - -# Build torchvision. -# Instructions: https://github.com/pytorch/vision/tree/main#installation -# See comment above for PYTORCH_BUILD_VERSION. -ENV CUDA_HOME=/usr/local/cuda -ENV BUILD_VERSION=$TORCHVISION_VERSION -RUN cd /usr/local/src && \ - git clone --recursive https://github.com/pytorch/vision && \ - cd vision && \ - git checkout tags/v$TORCHVISION_VERSION && \ - python setup.py bdist_wheel - -# Using multi-stage builds to ensure the output image is very small -# See: https://docs.docker.com/develop/develop-images/multistage-build/ -FROM alpine:latest - -RUN mkdir -p /tmp/whl/ -COPY --from=builder /usr/local/src/pytorch/dist/*.whl /tmp/whl -COPY --from=builder /usr/local/src/audio/dist/*.whl /tmp/whl -COPY --from=builder /usr/local/src/vision/dist/*.whl /tmp/whl - -# Print out the built .whl file. -RUN ls -lh /tmp/whl/ diff --git a/patches/kaggle_gcp.py b/patches/kaggle_gcp.py index 2c8b64cc..4cb98858 100644 --- a/patches/kaggle_gcp.py +++ b/patches/kaggle_gcp.py @@ -1,5 +1,6 @@ import os import inspect +import logging from google.auth import credentials, environment_vars from google.auth.exceptions import RefreshError from google.api_core.gapic_v1.client_info import ClientInfo @@ -8,8 +9,6 @@ from google.cloud.bigquery._http import Connection from kaggle_secrets import GcpTarget, UserSecretsClient -from log import Log - KAGGLE_GCP_CLIENT_USER_AGENT="kaggle-gcp-client/1.0" def get_integrations(): @@ -22,7 +21,7 @@ def get_integrations(): target = GcpTarget[integration.upper()] kernel_integrations.add_integration(target) except KeyError as e: - Log.error(f"Unknown integration target: {integration.upper()}") + logging.debug(f"Unknown integration target: {integration.upper()}") return kernel_integrations @@ -66,14 +65,14 @@ def refresh(self, request): elif self.target == GcpTarget.CLOUDAI: self.token, self.expiry = client._get_cloudai_access_token() except ConnectionError as e: - Log.error(f"Connection error trying to refresh access token: {e}") + logging.error(f"Connection error trying to refresh access token: {e}") print("There was a connection error trying to fetch the access token. " f"Please ensure internet is on in order to use the {self.target.service} Integration.") raise RefreshError('Unable to refresh access token due to connection error.') from e except Exception as e: - Log.error(f"Error trying to refresh access token: {e}") + logging.error(f"Error trying to refresh access token: {e}") if (not get_integrations().has_integration(self.target)): - Log.error(f"No {self.target.service} integration found.") + logging.error(f"No {self.target.service} integration found.") print( f"Please ensure you have selected a {self.target.service} account in the Notebook Add-ons menu.") raise RefreshError('Unable to refresh access token.') from e @@ -102,7 +101,7 @@ def api_request(self, *args, **kwargs): msg = ("Permission denied using Kaggle's public BigQuery integration. " "Did you mean to select a BigQuery account in the Notebook Add-ons menu?") print(msg) - Log.info(msg) + logging.info(msg) raise e @@ -156,23 +155,23 @@ def monkeypatch_bq(bq_client, *args, **kwargs): # Remove these two lines once this is resolved: # https://github.com/googleapis/google-cloud-python/issues/8108 if explicit_project_id: - Log.info(f"Explicit project set to {explicit_project_id}") + logging.info(f"Explicit project set to {explicit_project_id}") kwargs['project'] = explicit_project_id if explicit_project_id is None and specified_credentials is None and not has_bigquery: msg = "Using Kaggle's public dataset BigQuery integration." - Log.info(msg) + logging.info(msg) print(msg) return PublicBigqueryClient(*args, **kwargs) else: if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") kwargs['credentials'] = KaggleKernelCredentials() if (not has_bigquery): - Log.info("No bigquery integration found, creating client anyways.") + logging.info("No bigquery integration found, creating client anyways.") print('Please ensure you have selected a BigQuery ' 'account in the Notebook Add-ons menu.') if explicit_project_id is None: - Log.info("No project specified while using the unmodified client.") + logging.info("No project specified while using the unmodified client.") print('Please ensure you specify a project id when creating the client' ' in order to use your BigQuery account.') kwargs['client_info'] = set_kaggle_user_agent(kwargs.get('client_info')) @@ -196,20 +195,20 @@ def monkeypatch_aiplatform_init(aiplatform_klass, kaggle_kernel_credentials): def patched_init(*args, **kwargs): specified_credentials = kwargs.get('credentials') if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") kwargs['credentials'] = kaggle_kernel_credentials return aiplatform_init(*args, **kwargs) if (not has_been_monkeypatched(aiplatform_klass.init)): aiplatform_klass.init = patched_init - Log.info("aiplatform.init patched") + logging.info("aiplatform.init patched") def monkeypatch_client(client_klass, kaggle_kernel_credentials): client_init = client_klass.__init__ def patched_init(self, *args, **kwargs): specified_credentials = kwargs.get('credentials') if specified_credentials is None: - Log.info("No credentials specified, using KaggleKernelCredentials.") + logging.info("No credentials specified, using KaggleKernelCredentials.") # Some GCP services demand the billing and target project must be the same. # To avoid using default service account based credential as caller credential # user need to provide ClientOptions with quota_project_id: @@ -227,7 +226,7 @@ def patched_init(self, *args, **kwargs): if (not has_been_monkeypatched(client_klass.__init__)): client_klass.__init__ = patched_init - Log.info(f"Client patched: {client_klass}") + logging.info(f"Client patched: {client_klass}") def set_kaggle_user_agent(client_info: ClientInfo): # Add kaggle client user agent in order to attribute usage. @@ -253,37 +252,6 @@ def init_gcs(): KaggleKernelCredentials(target=GcpTarget.GCS)) return storage -def init_automl(): - from google.cloud import automl, automl_v1beta1 - if not is_user_secrets_token_set(): - return - - from kaggle_gcp import get_integrations - if not get_integrations().has_cloudai(): - return - - from kaggle_secrets import GcpTarget - from kaggle_gcp import KaggleKernelCredentials - kaggle_kernel_credentials = KaggleKernelCredentials(target=GcpTarget.CLOUDAI) - - # Patch the 2 GA clients: AutoMlClient and PreditionServiceClient - monkeypatch_client(automl.AutoMlClient, kaggle_kernel_credentials) - monkeypatch_client(automl.PredictionServiceClient, kaggle_kernel_credentials) - - # The AutoML client library exposes 3 different client classes (AutoMlClient, - # TablesClient, PredictionServiceClient), so patch each of them. - # The same KaggleKernelCredentials are passed to all of them. - # The GcsClient class is only used internally by TablesClient. - - # The beta version of the clients that are now GA are included here for now. - # They are deprecated and will be removed by 1 May 2020. - monkeypatch_client(automl_v1beta1.AutoMlClient, kaggle_kernel_credentials) - monkeypatch_client(automl_v1beta1.PredictionServiceClient, kaggle_kernel_credentials) - - # The TablesClient is still in beta, so this will not be deprecated until - # the TablesClient is GA. - monkeypatch_client(automl_v1beta1.TablesClient, kaggle_kernel_credentials) - def init_translation_v2(): from google.cloud import translate_v2 if not is_user_secrets_token_set(): @@ -379,7 +347,6 @@ def init_vision(): def init(): init_bigquery() init_gcs() - init_automl() init_translation_v2() init_translation_v3() init_natural_language() @@ -392,4 +359,4 @@ def init(): # google.cloud.* and kaggle_gcp. By calling init here, we guarantee # that regardless of the original import that caused google.cloud.* to be # loaded, the monkeypatching will be done. -init() +init() \ No newline at end of file diff --git a/patches/log.py b/patches/log.py deleted file mode 100644 index 59a07c8c..00000000 --- a/patches/log.py +++ /dev/null @@ -1,133 +0,0 @@ -import io -import logging -import os - -import google.auth - - -_LOG_TO_FILE_ENV = os.getenv("KAGGLE_LOG_TO_FILE") - - -class _LogFormatter(logging.Formatter): - """A logging formatter which truncates long messages.""" - - _MAX_LOG_LENGTH = 10000 # Be generous, not to truncate long backtraces. - - def format(self, record): - msg = super(_LogFormatter, self).format(record) - return msg[:_LogFormatter._MAX_LOG_LENGTH] if msg else msg - -# TODO(vimota): Clean this up once we're using python 3.8 and can use -# (https://github.com/python/cpython/commit/dde9fdbe453925279ac3d2a6a72102f6f9ef247c) -# Right now, making the logging module display the intended frame's information -# when the logging calls (info, warn, ...) are wrapped (as is the case in our -# Log class) involves fragile logic. -class _Logger(logging.Logger): - - # This is a copy of logging.Logger.findCaller with the filename ignore - # set expanded to include the current filename (".../log.py"). - # Copyright 2001-2015 by Vinay Sajip. All Rights Reserved. - # License: https://github.com/python/cpython/blob/ce9e62544571e7ade7186697d5dd065fb4c5243f/LICENSE - def findCaller(self, stack_info=False, stacklevel=1): - f = logging.currentframe() - f = f.f_back - rv = "(unknown file)", 0, "(unknown function)", None - while hasattr(f, "f_code"): - co = f.f_code - filename = os.path.normcase(co.co_filename) - if filename in _ignore_srcfiles: - f = f.f_back - continue - sinfo = None - if stack_info: - sio = io.StringIO() - sio.write('Stack (most recent call last):\n') - traceback.print_stack(f, file=sio) - sinfo = sio.getvalue() - if sinfo[-1] == '\n': - sinfo = sinfo[:-1] - sio.close() - rv = (co.co_filename, f.f_lineno, co.co_name, sinfo) - break - return rv - - -_srcfile = os.path.normcase(_Logger.findCaller.__code__.co_filename) -_ignore_srcfiles = (_srcfile, logging._srcfile) - -class Log: - """ Helper aggregate for all things related to logging activity. """ - - _GLOBAL_LOG = logging.getLogger("") - _initialized = False - - # These are convenience helpers. For performance, consider saving Log.get_logger() and using that - @staticmethod - def critical(msg, *args, **kwargs): - Log._GLOBAL_LOG.critical(msg, *args, **kwargs) - - @staticmethod - def fatal(msg, *args, **kwargs): - Log._GLOBAL_LOG.fatal(msg, *args, **kwargs) - - @staticmethod - def exception(msg, *args, **kwargs): - Log._GLOBAL_LOG.exception(msg, *args, **kwargs) - - @staticmethod - def error(msg, *args, **kwargs): - Log._GLOBAL_LOG.error(msg, *args, **kwargs) - - @staticmethod - def warn(msg, *args, **kwargs): - Log._GLOBAL_LOG.warn(msg, *args, **kwargs) - - @staticmethod - def warning(msg, *args, **kwargs): - Log._GLOBAL_LOG.warning(msg, *args, **kwargs) - - @staticmethod - def debug(msg, *args, **kwargs): - Log._GLOBAL_LOG.debug(msg, *args, **kwargs) - - @staticmethod - def info(msg, *args, **kwargs): - Log._GLOBAL_LOG.info(msg, *args, **kwargs) - - @staticmethod - def set_level(loglevel): - if isinstance(loglevel, int): - Log._GLOBAL_LOG.setLevel(loglevel) - return - elif isinstance(loglevel, str): - # idea from https://docs.python.org/3.5/howto/logging.html#logging-to-a-file - numeric_level = getattr(logging, loglevel.upper(), None) - if isinstance(numeric_level, int): - Log._GLOBAL_LOG.setLevel(numeric_level) - return - - raise ValueError('Invalid log level: %s' % loglevel) - - @staticmethod - def _static_init(): - if Log._initialized: - return - - logging.setLoggerClass(_Logger) - # The root logger's type is unfortunately (and surprisingly) not affected by - # `setLoggerClass`. Monkey patch it instead. TODO(vimota): Remove this, see the TODO - # associated with _Logger. - logging.RootLogger.findCaller = _Logger.findCaller - log_to_file = _LOG_TO_FILE_ENV.lower() in ("yes", "true", "t", "1") if _LOG_TO_FILE_ENV is not None else True - if log_to_file: - handler = logging.FileHandler(filename='/tmp/kaggle.log', mode='w') - else: - handler = logging.StreamHandler() - - # ".1s" is for the first letter: http://stackoverflow.com/a/27453084/1869. - format_string = "%(asctime)s %(levelname).1s %(process)d %(filename)s:%(lineno)d] %(message)s" - handler.setFormatter(_LogFormatter(format_string)) - logging.basicConfig(level=logging.INFO, handlers=[handler]) - Log._initialized = True - -Log._static_init() diff --git a/patches/sitecustomize.py b/patches/sitecustomize.py index e8afb361..1bb8a1b6 100644 --- a/patches/sitecustomize.py +++ b/patches/sitecustomize.py @@ -1,7 +1,6 @@ +import logging import os -from log import Log - import sys import importlib.abc import importlib @@ -13,7 +12,6 @@ class GcpModuleFinder(importlib.abc.MetaPathFinder): _MODULES = [ 'google.cloud.bigquery', 'google.cloud.storage', - 'google.cloud.automl_v1beta1', 'google.cloud.translate', 'google.cloud.translate_v2', 'google.cloud.translate_v3', @@ -56,7 +54,6 @@ def create_module(self, spec): _LOADERS = { 'google.cloud.bigquery': kaggle_gcp.init_bigquery, 'google.cloud.storage': kaggle_gcp.init_gcs, - 'google.cloud.automl_v1beta1': kaggle_gcp.init_automl, 'google.cloud.translate': kaggle_gcp.init_translation_v3, 'google.cloud.translate_v2': kaggle_gcp.init_translation_v2, 'google.cloud.translate_v3': kaggle_gcp.init_translation_v3, diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/metadata.json b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/metadata.json deleted file mode 100755 index e6beacde..00000000 --- a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/metadata.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "keras_version": "3.0.1", - "keras_nlp_version": "0.7.0", - "parameter_count": 4385920, - "date_saved": "2023-12-27@02:02:24" -} \ No newline at end of file diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/tokenizer.json b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/tokenizer.json deleted file mode 100755 index 48d99632..00000000 --- a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/tokenizer.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "module": "keras_nlp.src.models.bert.bert_tokenizer", - "class_name": "BertTokenizer", - "config": { - "name": "bert_tokenizer", - "trainable": true, - "dtype": "int32", - "vocabulary": null, - "sequence_length": null, - "lowercase": true, - "strip_accents": false, - "split": true, - "suffix_indicator": "##", - "oov_token": "[UNK]" - }, - "registered_name": "keras_nlp>BertTokenizer", - "assets": [ - "assets/tokenizer/vocabulary.txt" - ], - "weights": null -} \ No newline at end of file diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/assets/tokenizer/vocabulary.txt b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/assets/tokenizer/vocabulary.txt old mode 100755 new mode 100644 similarity index 100% rename from tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/assets/tokenizer/vocabulary.txt rename to tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/assets/tokenizer/vocabulary.txt diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/config.json b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/config.json similarity index 68% rename from tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/config.json rename to tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/config.json index 3afddd31..94aa0b65 100755 --- a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/2/config.json +++ b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/config.json @@ -1,5 +1,5 @@ { - "module": "keras_nlp.src.models.bert.bert_backbone", + "module": "keras_hub.src.models.bert.bert_backbone", "class_name": "BertBackbone", "config": { "name": "bert_backbone", @@ -13,7 +13,5 @@ "max_sequence_length": 512, "num_segments": 2 }, - "registered_name": "keras_nlp>BertBackbone", - "assets": [], - "weights": "model.weights.h5" + "registered_name": "keras_hub>BertBackbone" } \ No newline at end of file diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/metadata.json b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/metadata.json new file mode 100755 index 00000000..db25ecad --- /dev/null +++ b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/metadata.json @@ -0,0 +1,10 @@ +{ + "keras_version": "3.7.0", + "keras_hub_version": "0.19.0", + "parameter_count": 4385920, + "date_saved": "2024-12-20@19:42:50", + "tasks": [ + "MaskedLM", + "TextClassifier" + ] +} \ No newline at end of file diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/model.weights.h5 b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/model.weights.h5 new file mode 100755 index 00000000..2951f93d Binary files /dev/null and b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/model.weights.h5 differ diff --git a/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/tokenizer.json b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/tokenizer.json new file mode 100755 index 00000000..d32697cc --- /dev/null +++ b/tests/data/kagglehub/models/keras/bert/keras/bert_tiny_en_uncased/3/tokenizer.json @@ -0,0 +1,27 @@ +{ + "module": "keras_hub.src.models.bert.bert_tokenizer", + "class_name": "BertTokenizer", + "config": { + "name": "bert_tokenizer", + "trainable": true, + "dtype": { + "module": "keras", + "class_name": "DTypePolicy", + "config": { + "name": "int32" + }, + "registered_name": null + }, + "config_file": "tokenizer.json", + "vocabulary": null, + "sequence_length": null, + "lowercase": true, + "strip_accents": false, + "split": true, + "suffix_indicator": "##", + "oov_token": "[UNK]", + "special_tokens": null, + "special_tokens_in_strings": false + }, + "registered_name": "keras_hub>BertTokenizer" +} \ No newline at end of file diff --git a/tests/test_annoy.py b/tests/test_annoy.py deleted file mode 100644 index 93b7d0c2..00000000 --- a/tests/test_annoy.py +++ /dev/null @@ -1,11 +0,0 @@ -import unittest - -from annoy import AnnoyIndex - - -class TestAnnoy(unittest.TestCase): - def test_tree(self): - t = AnnoyIndex(5, 'angular') - t.add_item(1, [1,2,3,4,5]) - - self.assertTrue(t.build(1)) diff --git a/tests/test_automl.py b/tests/test_automl.py deleted file mode 100644 index 9a048b14..00000000 --- a/tests/test_automl.py +++ /dev/null @@ -1,139 +0,0 @@ -import unittest - -from unittest.mock import Mock, patch - -from kaggle_gcp import KaggleKernelCredentials, init_automl -from test.support.os_helper import EnvironmentVarGuard -from google.cloud import storage, automl_v1beta1, automl - -def _make_credentials(): - import google.auth.credentials - credentials = Mock(spec=google.auth.credentials.Credentials) - credentials.universe_domain = 'googleapis.com' - return credentials - -class TestAutoMl(unittest.TestCase): - - class FakeClient: - def __init__(self, credentials=None, client_info=None, **kwargs): - self.credentials = credentials - - class FakeConnection(): - def __init__(self, user_agent): - self.user_agent = user_agent - if (client_info is not None): - self._connection = FakeConnection(client_info.user_agent) - - @patch("google.cloud.automl.AutoMlClient", new=FakeClient) - def test_user_provided_credentials(self): - credentials = _make_credentials() - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - init_automl() - client = automl.AutoMlClient(credentials=credentials) - self.assertNotIsInstance(client.credentials, KaggleKernelCredentials) - self.assertIsNotNone(client.credentials) - - def test_tables_gcs_client(self): - # The GcsClient can't currently be monkeypatched for default - # credentials because it requires a project which can't be set. - # Verify that creating an automl_v1beta1.GcsClient given an actual - # storage.Client sets the client properly. - gcs_client = storage.Client(project="xyz", credentials=_make_credentials()) - tables_gcs_client = automl_v1beta1.GcsClient(client=gcs_client) - self.assertIs(tables_gcs_client.client, gcs_client) - - @patch("google.cloud.automl_v1beta1.gapic.auto_ml_client.AutoMlClient", new=FakeClient) - def test_tables_client_credentials(self): - credentials = _make_credentials() - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - init_automl() - tables_client = automl_v1beta1.TablesClient(credentials=credentials) - self.assertEqual(tables_client.auto_ml_client.credentials, credentials) - - @patch("google.cloud.automl.AutoMlClient", new=FakeClient) - def test_default_credentials_automl_client(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - init_automl() - automl_client = automl.AutoMlClient() - self.assertIsNotNone(automl_client.credentials) - self.assertIsInstance(automl_client.credentials, KaggleKernelCredentials) - self.assertTrue(automl_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) - - @patch("google.cloud.automl_v1beta1.AutoMlClient", new=FakeClient) - def test_default_credentials_automl_v1beta1_client(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - init_automl() - automl_client = automl_v1beta1.AutoMlClient() - self.assertIsNotNone(automl_client.credentials) - self.assertIsInstance(automl_client.credentials, KaggleKernelCredentials) - self.assertTrue(automl_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) - - @patch("google.cloud.automl_v1beta1.TablesClient", new=FakeClient) - def test_default_credentials_tables_client(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - init_automl() - tables_client = automl_v1beta1.TablesClient() - self.assertIsNotNone(tables_client.credentials) - self.assertIsInstance(tables_client.credentials, KaggleKernelCredentials) - self.assertTrue(tables_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) - - @patch("google.cloud.automl.PredictionServiceClient", new=FakeClient) - def test_default_credentials_prediction_client(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - prediction_client = automl.PredictionServiceClient() - self.assertIsNotNone(prediction_client.credentials) - self.assertIsInstance(prediction_client.credentials, KaggleKernelCredentials) - self.assertTrue(prediction_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) - - @patch("google.cloud.automl_v1beta1.PredictionServiceClient", new=FakeClient) - def test_default_credentials_prediction_v1beta1_client(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - prediction_client = automl_v1beta1.PredictionServiceClient() - self.assertIsNotNone(prediction_client.credentials) - self.assertIsInstance(prediction_client.credentials, KaggleKernelCredentials) - self.assertTrue(prediction_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) - - def test_monkeypatching_idempotent(self): - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') - with env: - client1 = automl.AutoMlClient.__init__ - init_automl() - client2 = automl.AutoMlClient.__init__ - self.assertEqual(client1, client2) - - @patch("google.cloud.automl_v1beta1.PredictionServiceClient", new=FakeClient) - def test_legacy_AUTOML_variable_v1beta1_client(self): - """ - Tests previous KAGGLE_KERNEL_INTEGRATIONS="AUTOML" environment setting - """ - env = EnvironmentVarGuard() - env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') - env.set('KAGGLE_KERNEL_INTEGRATIONS', 'AUTOML') - with env: - prediction_client = automl_v1beta1.PredictionServiceClient() - self.assertIsNotNone(prediction_client.credentials) - self.assertIsInstance(prediction_client.credentials, KaggleKernelCredentials) - self.assertTrue(prediction_client._connection.user_agent.startswith("kaggle-gcp-client/1.0")) \ No newline at end of file diff --git a/tests/test_geopandas.py b/tests/test_geopandas.py deleted file mode 100644 index 4c0106b2..00000000 --- a/tests/test_geopandas.py +++ /dev/null @@ -1,16 +0,0 @@ -import unittest - -import geopandas - -class TestGeopandas(unittest.TestCase): - def test_read(self): - df = geopandas.read_file(geopandas.datasets.get_path('nybb')) - self.assertTrue(df.size > 1) - - def test_spatial_join(self): - cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities')) - world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) - countries = world[['geometry', 'name']] - countries = countries.rename(columns={'name':'country'}) - cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects') - self.assertTrue(cities_with_country.size > 1) diff --git a/tests/test_google_import_adk.py b/tests/test_google_import_adk.py new file mode 100644 index 00000000..9ae11314 --- /dev/null +++ b/tests/test_google_import_adk.py @@ -0,0 +1,31 @@ +import json +import unittest +import threading +from urllib.parse import urlparse + +class TestGoogleADK(unittest.TestCase): + + def define_agent(self): + from google.adk.agents import Agent + from google.adk.models.google_llm import Gemini + from google.adk.runners import InMemoryRunner + from google.adk.tools import google_search + from google.genai import types + + retry_config = types.HttpRetryOptions( + attempts=5, # Maximum retry attempts + exp_base=7, # Delay multiplier + initial_delay=1, # Initial delay before first retry (in seconds) + http_status_codes=[429, 500, 503, 504] # Retry on these HTTP errors + ) + + root_agent = Agent( + name="helpful_assistant", + model=Gemini( + model="gemini-2.0-flash-lite", + retry_options=retry_config + ), + description="A simple agent that can answer general questions.", + instruction="You are a helpful assistant. Use Google Search for current info or if unsure.", + tools=[google_search], + ) diff --git a/tests/test_imports.py b/tests/test_imports.py index b22ebe7a..6c429516 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -3,6 +3,5 @@ class TestImport(unittest.TestCase): # Basic import tests for packages without any. def test_basic(self): - import bq_helper import tensorflow_datasets import segment_anything diff --git a/tests/test_jiter.py b/tests/test_jiter.py new file mode 100644 index 00000000..6b31925e --- /dev/null +++ b/tests/test_jiter.py @@ -0,0 +1,9 @@ +import unittest + +from distutils.version import StrictVersion + +import jiter + +class TestJiter(unittest.TestCase): + def test_version(self): + self.assertEqual(StrictVersion(jiter.__version__), StrictVersion("0.10.0")) diff --git a/tests/test_matplotlib.py b/tests/test_matplotlib.py index c04f3f23..125ccda4 100644 --- a/tests/test_matplotlib.py +++ b/tests/test_matplotlib.py @@ -8,10 +8,6 @@ import numpy as np class TestMatplotlib(unittest.TestCase): - def test_version(self): - # b/308525631: newer versions of Matplotlib causes learntools to fail - self.assertLess(StrictVersion(matplotlib.__version__), StrictVersion("3.8.0")) - def test_plot(self): plt.plot(np.linspace(0,1,50), np.random.rand(50)) plt.savefig("plot1.png") diff --git a/tests/test_numpy.py b/tests/test_numpy.py index 948455ea..ab7ec03c 100644 --- a/tests/test_numpy.py +++ b/tests/test_numpy.py @@ -7,25 +7,7 @@ from contextlib import redirect_stdout class TestNumpy(unittest.TestCase): - def test_version(self): - # b/370860329: newer versions are not capable with current tensorflow - self.assertEqual(StrictVersion(np.__version__), StrictVersion("1.26.4")) - def test_array(self): array = np.array([1, 3]) self.assertEqual((2,), array.shape) - - # Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation - # and numpy will be reinstalled with an OpenBLAS backing.) - def test_mkl(self): - try: - from numpy.distutils.system_info import get_info - # This will throw an exception if the MKL is not linked correctly or return an empty dict. - self.assertTrue(get_info("blas_mkl")) - except: - # Fallback to check if mkl is present via show_config() - config_out = io.StringIO() - with redirect_stdout(config_out): - np.show_config() - self.assertIn("mkl_rt", config_out.getvalue()) diff --git a/tests/test_pydegensac.py b/tests/test_pydegensac.py deleted file mode 100644 index be72b53e..00000000 --- a/tests/test_pydegensac.py +++ /dev/null @@ -1,18 +0,0 @@ -import unittest - -import pydegensac -import numpy as np - - -class TestPydegensac(unittest.TestCase): - def test_find_homography(self): - src_pts = np.float32([ [0,0],[0,1],[1,1],[1,0] ]).reshape(-1,2) - dst_pts = np.float32([ [0,0],[0,-1],[-1,-1],[-1,0] ]).reshape(-1,2) - - H, mask = pydegensac.findHomography(src_pts, dst_pts, 4, 1) - - self.assertEqual(3, len(H)) - self.assertEqual(4, len(mask)) - - - diff --git a/tests/test_tensorflow_cloud.py b/tests/test_tensorflow_cloud.py deleted file mode 100644 index 2875e121..00000000 --- a/tests/test_tensorflow_cloud.py +++ /dev/null @@ -1,8 +0,0 @@ -import unittest - -import tensorflow_cloud as tfc - - -class TestTensorflowCloud(unittest.TestCase): - def test_remote(self): - self.assertFalse(tfc.remote()) diff --git a/tests/test_translation.py b/tests/test_translation.py index 5bb41b62..52de2a08 100644 --- a/tests/test_translation.py +++ b/tests/test_translation.py @@ -6,7 +6,7 @@ from kaggle_gcp import KaggleKernelCredentials, KaggleKernelWithProjetCredentials, init_translation_v2, init_translation_v3 from test.support.os_helper import EnvironmentVarGuard from google.api_core import client_options -from google.cloud import translate, translate_v2 +from google.cloud import translate_v3 as translate, translate_v2 def _make_credentials(): import google.auth.credentials @@ -48,7 +48,7 @@ def test_user_provided_credentials_v2(self): self.assertIsNotNone(client.credentials) self.assertNotIsInstance(client.credentials, KaggleKernelCredentials) - @patch("google.cloud.translate.TranslationServiceClient", new=FakeClient) + @patch("google.cloud.translate_v3.TranslationServiceClient", new=FakeClient) def test_default_credentials_v3(self): env = EnvironmentVarGuard() env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') @@ -60,7 +60,7 @@ def test_default_credentials_v3(self): self.assertIsInstance(client.credentials, KaggleKernelCredentials) - @patch("google.cloud.translate.TranslationServiceClient", new=FakeClient) + @patch("google.cloud.translate_v3.TranslationServiceClient", new=FakeClient) def test_user_provided_credentials_v3(self): credentials = _make_credentials() env = EnvironmentVarGuard() @@ -107,13 +107,12 @@ def test_monkeypatching_idempotent(self): self.assertEqual(client2_1, client2_2) self.assertEqual(client3_1, client3_2) - @patch("google.cloud.translate.TranslationServiceClient", new=FakeClient) + @patch("google.cloud.translate_v3.TranslationServiceClient", new=FakeClient) def test_client_credential_uniqueness_v3(self): """ Client instance must use unique KaggleKernelWithProjetCredentials with quota_project_id when client_options.quota_project_id provided. (even if quota_project_id is same) """ - credentials = _make_credentials() env = EnvironmentVarGuard() env.set('KAGGLE_USER_SECRETS_TOKEN', 'foobar') env.set('KAGGLE_KERNEL_INTEGRATIONS', 'CLOUDAI') diff --git a/tests/test_user_secrets.py b/tests/test_user_secrets.py index 67c628f7..c11432fe 100644 --- a/tests/test_user_secrets.py +++ b/tests/test_user_secrets.py @@ -200,22 +200,6 @@ def call_get_cloudai_access_token(): client = UserSecretsClient() secret_response = client._get_cloudai_access_token() self.assertEqual(secret_response, (secret, now + timedelta(seconds=3600))) - def call_get_translation_access_token(): - client = UserSecretsClient() - secret_response = client._get_translation_access_token() - self.assertEqual(secret_response, (secret, now + timedelta(seconds=3600))) - def call_get_natural_lang_access_token(): - client = UserSecretsClient() - secret_response = client._get_natural_language_access_token() - self.assertEqual(secret_response, (secret, now + timedelta(seconds=3600))) - def call_get_video_intell_access_token(): - client = UserSecretsClient() - secret_response = client._get_video_intelligence_access_token() - self.assertEqual(secret_response, (secret, now + timedelta(seconds=3600))) - def call_get_vision_access_token(): - client = UserSecretsClient() - secret_response = client._get_vision_access_token() - self.assertEqual(secret_response, (secret, now + timedelta(seconds=3600))) self._test_client(call_get_bigquery_access_token, '/requests/GetUserSecretRequest', {'Target': GcpTarget.BIGQUERY.target}, diff --git a/tests/test_xgboost.py b/tests/test_xgboost.py index 618a63cc..68166813 100644 --- a/tests/test_xgboost.py +++ b/tests/test_xgboost.py @@ -17,10 +17,9 @@ def test_classifier(self): X_test = np.random.random((100, 28)) y_test = np.random.randint(10, size=(100, 1)) - xgb1 = XGBClassifier(n_estimators=3, use_label_encoder=False) + xgb1 = XGBClassifier(n_estimators=3, use_label_encoder=False, eval_metric='mlogloss') xgb1.fit( X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], - eval_metric='mlogloss' ) self.assertIn("validation_0", xgb1.evals_result()) diff --git a/tests/utils/kagglehub.py b/tests/utils/kagglehub.py index d7819dde..7a2a8995 100644 --- a/tests/utils/kagglehub.py +++ b/tests/utils/kagglehub.py @@ -1,3 +1,4 @@ +import json import os import threading import re @@ -7,6 +8,8 @@ from test.support.os_helper import EnvironmentVarGuard from http.server import BaseHTTPRequestHandler, HTTPServer +from kagglesdk.kaggle_env import get_endpoint, get_env + class KaggleAPIHandler(BaseHTTPRequestHandler): """ Fake Kaggle API server supporting the download endpoint. @@ -15,15 +18,18 @@ class KaggleAPIHandler(BaseHTTPRequestHandler): def do_HEAD(self): self.send_response(200) - def do_GET(self): - m = re.match("^/api/v1/models/(.+)/download/(.+)$", self.path) - if not m: + def do_POST(self): + content_length = int(self.headers.get('Content-Length', 0)) + body_bytes = self.rfile.read(content_length) + request_body = json.loads(body_bytes.decode('utf-8')) + + if self.path != "/api/v1/models.ModelApiService/DownloadModelInstanceVersion": self.send_response(404) self.wfile.write(bytes(f"Unhandled path: {self.path}", "utf-8")) return - model_handle = m.group(1) - path = m.group(2) + model_handle = f"{request_body["ownerSlug"]}/{request_body["modelSlug"]}/keras/{request_body["instanceSlug"]}/{request_body["versionNumber"]}" + path = request_body["path"] filepath = f"/input/tests/data/kagglehub/models/{model_handle}/{path}" if not os.path.isfile(filepath): self.send_error(404, "Internet is disabled in our tests " @@ -41,14 +47,12 @@ def do_GET(self): @contextmanager def create_test_kagglehub_server(): - endpoint = 'http://localhost:7777' env = EnvironmentVarGuard() - env.set('KAGGLE_API_ENDPOINT', endpoint) - test_server_address = urlparse(endpoint) + env.set('KAGGLE_API_ENVIRONMENT', 'TEST') with env: - if not test_server_address.hostname or not test_server_address.port: - msg = f"Invalid test server address: {endpoint}. You must specify a hostname & port" - raise ValueError(msg) + endpoint = get_endpoint(get_env()) + test_server_address = urlparse(endpoint) + with HTTPServer((test_server_address.hostname, test_server_address.port), KaggleAPIHandler) as httpd: threading.Thread(target=httpd.serve_forever).start() diff --git a/tpu/Dockerfile b/tpu/Dockerfile index fd0c0684..343443ae 100644 --- a/tpu/Dockerfile +++ b/tpu/Dockerfile @@ -34,20 +34,39 @@ RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y # Additional useful packages should be added in the requirements.txt # Bring in the requirements.txt and replace variables in it: RUN apt-get install -y gettext -ADD tpu/requirements.txt /kaggle_requirements.txt -RUN envsubst < /kaggle_requirements.txt > /requirements.txt +ADD tpu/requirements.in /kaggle_requirements.in +RUN envsubst < /kaggle_requirements.in > /requirements.in # Install uv and then install the requirements: RUN curl -LsSf https://astral.sh/uv/install.sh | sh -RUN export PATH="${HOME}/.local/bin:${PATH}" && uv pip install --system -r /requirements.txt --prerelease=allow --force-reinstall && \ +RUN export PATH="${HOME}/.local/bin:${PATH}" && \ + uv pip compile --system --prerelease=allow \ + --verbose \ + --upgrade \ + --find-links=https://storage.googleapis.com/jax-releases/libtpu_releases.html \ + --find-links=https://storage.googleapis.com/libtpu-releases/index.html \ + --find-links=https://storage.googleapis.com/libtpu-wheels/index.html \ + --find-links=https://download.pytorch.org/whl/torch_stable.html \ + --emit-find-links \ + --no-emit-package pip \ + --no-emit-package setuptools \ + --output-file /requirements.txt \ + /requirements.in && \ + uv pip install --system --prerelease=allow --force-reinstall \ + -r /requirements.txt && \ + uv cache clean && \ /tmp/clean-layer.sh ENV PATH="~/.local/bin:${PATH}" -# Try to force tensorflow to reliably install without breaking other installed deps +# We install a libtpu version compatible with both jax 0.7.2 and torch 2.8.0. +# Why? tunix latest -> flax 0.12 -> jax 0.7.2 -> libtpu 0.0.23. However, that +# libtpu causes pjrt api errors for torch 2.8.0. screenshot/5heUtdyaJ4MmR3D +# https://github.com/pytorch/xla/blob/d517649bdef6ab0519c30c704bde8779c8216502/setup.py#L111 +# https://github.com/jax-ml/jax/blob/3489529b38d1f11d1e5caf4540775aadd5f2cdda/setup.py#L26 RUN export PATH="${HOME}/.local/bin:${PATH}" && \ - uv pip freeze --system > /tmp/constraints.txt && \ - uv pip install --system -c /tmp/constraints.txt tensorflow-tpu -f https://storage.googleapis.com/libtpu-tf-releases/index.html --force-reinstall && \ - rm /tmp/constraints.txt + uv pip install --system --force-reinstall libtpu==0.0.17 && \ + uv cache clean && \ + /tmp/clean-layer.sh # Kaggle Model Hub patches: ADD patches/kaggle_module_resolver.py /usr/local/lib/${PYTHON_VERSION_PATH}/site-packages/tensorflow_hub/kaggle_module_resolver.py diff --git a/tpu/requirements.txt b/tpu/requirements.in similarity index 58% rename from tpu/requirements.txt rename to tpu/requirements.in index f31a2e51..1fceeebb 100644 --- a/tpu/requirements.txt +++ b/tpu/requirements.in @@ -1,20 +1,19 @@ # TPU Utils tpu-info # Tensorflow packages -tensorflow-tpu==${TENSORFLOW_VERSION} ---find-links https://storage.googleapis.com/libtpu-tf-releases/index.html +# TODO: b/447621961 - re-enable tensorflow-tpu when a compatible libtpu can be found. +tensorflow-cpu==${TENSORFLOW_VERSION} tensorflow_hub tensorflow-io tensorflow-probability tensorflow_datasets # Torch packages -torch==${TORCH_VERSION} +https://download.pytorch.org/whl/cpu/torch-${TORCH_VERSION}%2Bcpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl +https://download.pytorch.org/whl/cpu/torchaudio-${TORCHAUDIO_VERSION}%2Bcpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl +https://download.pytorch.org/whl/cpu/torchvision-${TORCHVISION_VERSION}%2Bcpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl -torchaudio==${TORCHAUDIO_VERSION} -torchvision==${TORCHVISION_VERSION} # Jax packages -jax[tpu]>=0.5.2 ---find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html +jax[tpu] distrax flax git+https://github.com/deepmind/dm-haiku